1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <stdarg.h>
6 #include <locale.h>
7 #include "linear.h"
8 #include "newton.h"
9 int liblinear_version = LIBLINEAR_VERSION;
10 typedef signed char schar;
swap(T & x,T & y)11 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
12 #ifndef min
min(T x,T y)13 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
14 #endif
15 #ifndef max
max(T x,T y)16 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
17 #endif
clone(T * & dst,S * src,int n)18 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
19 {
20 dst = new T[n];
21 memcpy((void *)dst,(void *)src,sizeof(T)*n);
22 }
23 #define INF HUGE_VAL
24 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
25
print_string_stdout(const char * s)26 static void print_string_stdout(const char *s)
27 {
28 fputs(s,stdout);
29 fflush(stdout);
30 }
print_null(const char * s)31 static void print_null(const char *s) {}
32
33 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
34
35 #if 1
info(const char * fmt,...)36 static void info(const char *fmt,...)
37 {
38 char buf[BUFSIZ];
39 va_list ap;
40 va_start(ap,fmt);
41 vsprintf(buf,fmt,ap);
42 va_end(ap);
43 (*liblinear_print_string)(buf);
44 }
45 #else
info(const char * fmt,...)46 static void info(const char *fmt,...) {}
47 #endif
48 class sparse_operator
49 {
50 public:
nrm2_sq(const feature_node * x)51 static double nrm2_sq(const feature_node *x)
52 {
53 double ret = 0;
54 while(x->index != -1)
55 {
56 ret += x->value*x->value;
57 x++;
58 }
59 return ret;
60 }
61
dot(const double * s,const feature_node * x)62 static double dot(const double *s, const feature_node *x)
63 {
64 double ret = 0;
65 while(x->index != -1)
66 {
67 ret += s[x->index-1]*x->value;
68 x++;
69 }
70 return ret;
71 }
72
sparse_dot(const feature_node * x1,const feature_node * x2)73 static double sparse_dot(const feature_node *x1, const feature_node *x2)
74 {
75 double ret = 0;
76 while(x1->index != -1 && x2->index != -1)
77 {
78 if(x1->index == x2->index)
79 {
80 ret += x1->value * x2->value;
81 ++x1;
82 ++x2;
83 }
84 else
85 {
86 if(x1->index > x2->index)
87 ++x2;
88 else
89 ++x1;
90 }
91 }
92 return ret;
93 }
94
axpy(const double a,const feature_node * x,double * y)95 static void axpy(const double a, const feature_node *x, double *y)
96 {
97 while(x->index != -1)
98 {
99 y[x->index-1] += a*x->value;
100 x++;
101 }
102 }
103 };
104
105 // L2-regularized empirical risk minimization
106 // min_w w^Tw/2 + \sum C_i \xi(w^Tx_i), where \xi() is the loss
107
108 class l2r_erm_fun: public function
109 {
110 public:
111 l2r_erm_fun(const problem *prob, const parameter *param, double *C);
112 ~l2r_erm_fun();
113
114 double fun(double *w);
115 double linesearch_and_update(double *w, double *d, double *f, double *g, double alpha);
116 int get_nr_variable(void);
117
118 protected:
119 virtual double C_times_loss(int i, double wx_i) = 0;
120 void Xv(double *v, double *Xv);
121 void XTv(double *v, double *XTv);
122
123 double *C;
124 const problem *prob;
125 double *wx;
126 double *tmp; // a working array
127 double wTw;
128 int regularize_bias;
129 };
130
l2r_erm_fun(const problem * prob,const parameter * param,double * C)131 l2r_erm_fun::l2r_erm_fun(const problem *prob, const parameter *param, double *C)
132 {
133 int l=prob->l;
134
135 this->prob = prob;
136
137 wx = new double[l];
138 tmp = new double[l];
139 this->C = C;
140 this->regularize_bias = param->regularize_bias;
141 }
142
~l2r_erm_fun()143 l2r_erm_fun::~l2r_erm_fun()
144 {
145 delete[] wx;
146 delete[] tmp;
147 }
148
fun(double * w)149 double l2r_erm_fun::fun(double *w)
150 {
151 int i;
152 double f=0;
153 int l=prob->l;
154 int w_size=get_nr_variable();
155
156 wTw = 0;
157 Xv(w, wx);
158
159 for(i=0;i<w_size;i++)
160 wTw += w[i]*w[i];
161 if(regularize_bias == 0)
162 wTw -= w[w_size-1]*w[w_size-1];
163 for(i=0;i<l;i++)
164 f += C_times_loss(i, wx[i]);
165 f = f + 0.5 * wTw;
166
167 return f;
168 }
169
get_nr_variable(void)170 int l2r_erm_fun::get_nr_variable(void)
171 {
172 return prob->n;
173 }
174
175 // On entry *f must be the function value of w
176 // On exit w is updated and *f is the new function value
linesearch_and_update(double * w,double * s,double * f,double * g,double alpha)177 double l2r_erm_fun::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
178 {
179 int i;
180 int l = prob->l;
181 double sTs = 0;
182 double wTs = 0;
183 double gTs = 0;
184 double eta = 0.01;
185 int w_size = get_nr_variable();
186 int max_num_linesearch = 20;
187 double fold = *f;
188 Xv(s, tmp);
189
190 for (i=0;i<w_size;i++)
191 {
192 sTs += s[i] * s[i];
193 wTs += s[i] * w[i];
194 gTs += s[i] * g[i];
195 }
196 if(regularize_bias == 0)
197 {
198 // bias not used in calculating (w + \alpha s)^T (w + \alpha s)
199 sTs -= s[w_size-1] * s[w_size-1];
200 wTs -= s[w_size-1] * w[w_size-1];
201 }
202
203 int num_linesearch = 0;
204 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
205 {
206 double loss = 0;
207 for(i=0;i<l;i++)
208 {
209 double inner_product = tmp[i] * alpha + wx[i];
210 loss += C_times_loss(i, inner_product);
211 }
212 *f = loss + (alpha * alpha * sTs + wTw) / 2.0 + alpha * wTs;
213 if (*f - fold <= eta * alpha * gTs)
214 {
215 for (i=0;i<l;i++)
216 wx[i] += alpha * tmp[i];
217 break;
218 }
219 else
220 alpha *= 0.5;
221 }
222
223 if (num_linesearch >= max_num_linesearch)
224 {
225 *f = fold;
226 return 0;
227 }
228 else
229 for (i=0;i<w_size;i++)
230 w[i] += alpha * s[i];
231
232 wTw += alpha * alpha * sTs + 2* alpha * wTs;
233 return alpha;
234 }
235
Xv(double * v,double * Xv)236 void l2r_erm_fun::Xv(double *v, double *Xv)
237 {
238 int i;
239 int l=prob->l;
240 feature_node **x=prob->x;
241
242 for(i=0;i<l;i++)
243 Xv[i]=sparse_operator::dot(v, x[i]);
244 }
245
XTv(double * v,double * XTv)246 void l2r_erm_fun::XTv(double *v, double *XTv)
247 {
248 int i;
249 int l=prob->l;
250 int w_size=get_nr_variable();
251 feature_node **x=prob->x;
252
253 for(i=0;i<w_size;i++)
254 XTv[i]=0;
255 for(i=0;i<l;i++)
256 sparse_operator::axpy(v[i], x[i], XTv);
257 }
258
259 class l2r_lr_fun: public l2r_erm_fun
260 {
261 public:
262 l2r_lr_fun(const problem *prob, const parameter *param, double *C);
263 ~l2r_lr_fun();
264
265 void grad(double *w, double *g);
266 void Hv(double *s, double *Hs);
267
268 void get_diag_preconditioner(double *M);
269
270 private:
271 double *D;
272 double C_times_loss(int i, double wx_i);
273 };
274
l2r_lr_fun(const problem * prob,const parameter * param,double * C)275 l2r_lr_fun::l2r_lr_fun(const problem *prob, const parameter *param, double *C):
276 l2r_erm_fun(prob, param, C)
277 {
278 int l=prob->l;
279 D = new double[l];
280 }
281
~l2r_lr_fun()282 l2r_lr_fun::~l2r_lr_fun()
283 {
284 delete[] D;
285 }
286
C_times_loss(int i,double wx_i)287 double l2r_lr_fun::C_times_loss(int i, double wx_i)
288 {
289 double ywx_i = wx_i * prob->y[i];
290 if (ywx_i >= 0)
291 return C[i]*log(1 + exp(-ywx_i));
292 else
293 return C[i]*(-ywx_i + log(1 + exp(ywx_i)));
294 }
295
grad(double * w,double * g)296 void l2r_lr_fun::grad(double *w, double *g)
297 {
298 int i;
299 double *y=prob->y;
300 int l=prob->l;
301 int w_size=get_nr_variable();
302
303 for(i=0;i<l;i++)
304 {
305 tmp[i] = 1/(1 + exp(-y[i]*wx[i]));
306 D[i] = tmp[i]*(1-tmp[i]);
307 tmp[i] = C[i]*(tmp[i]-1)*y[i];
308 }
309 XTv(tmp, g);
310
311 for(i=0;i<w_size;i++)
312 g[i] = w[i] + g[i];
313 if(regularize_bias == 0)
314 g[w_size-1] -= w[w_size-1];
315 }
316
get_diag_preconditioner(double * M)317 void l2r_lr_fun::get_diag_preconditioner(double *M)
318 {
319 int i;
320 int l = prob->l;
321 int w_size=get_nr_variable();
322 feature_node **x = prob->x;
323
324 for (i=0; i<w_size; i++)
325 M[i] = 1;
326 if(regularize_bias == 0)
327 M[w_size-1] = 0;
328
329 for (i=0; i<l; i++)
330 {
331 feature_node *xi = x[i];
332 while (xi->index!=-1)
333 {
334 M[xi->index-1] += xi->value*xi->value*C[i]*D[i];
335 xi++;
336 }
337 }
338 }
339
Hv(double * s,double * Hs)340 void l2r_lr_fun::Hv(double *s, double *Hs)
341 {
342 int i;
343 int l=prob->l;
344 int w_size=get_nr_variable();
345 feature_node **x=prob->x;
346
347 for(i=0;i<w_size;i++)
348 Hs[i] = 0;
349 for(i=0;i<l;i++)
350 {
351 feature_node * const xi=x[i];
352 double xTs = sparse_operator::dot(s, xi);
353
354 xTs = C[i]*D[i]*xTs;
355
356 sparse_operator::axpy(xTs, xi, Hs);
357 }
358 for(i=0;i<w_size;i++)
359 Hs[i] = s[i] + Hs[i];
360 if(regularize_bias == 0)
361 Hs[w_size-1] -= s[w_size-1];
362 }
363
364 class l2r_l2_svc_fun: public l2r_erm_fun
365 {
366 public:
367 l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C);
368 ~l2r_l2_svc_fun();
369
370 void grad(double *w, double *g);
371 void Hv(double *s, double *Hs);
372
373 void get_diag_preconditioner(double *M);
374
375 protected:
376 void subXTv(double *v, double *XTv);
377
378 int *I;
379 int sizeI;
380
381 private:
382 double C_times_loss(int i, double wx_i);
383 };
384
l2r_l2_svc_fun(const problem * prob,const parameter * param,double * C)385 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C):
386 l2r_erm_fun(prob, param, C)
387 {
388 I = new int[prob->l];
389 }
390
~l2r_l2_svc_fun()391 l2r_l2_svc_fun::~l2r_l2_svc_fun()
392 {
393 delete[] I;
394 }
395
C_times_loss(int i,double wx_i)396 double l2r_l2_svc_fun::C_times_loss(int i, double wx_i)
397 {
398 double d = 1 - prob->y[i] * wx_i;
399 if (d > 0)
400 return C[i] * d * d;
401 else
402 return 0;
403 }
404
grad(double * w,double * g)405 void l2r_l2_svc_fun::grad(double *w, double *g)
406 {
407 int i;
408 double *y=prob->y;
409 int l=prob->l;
410 int w_size=get_nr_variable();
411
412 sizeI = 0;
413 for (i=0;i<l;i++)
414 {
415 tmp[i] = wx[i] * y[i];
416 if (tmp[i] < 1)
417 {
418 tmp[sizeI] = C[i]*y[i]*(tmp[i]-1);
419 I[sizeI] = i;
420 sizeI++;
421 }
422 }
423 subXTv(tmp, g);
424
425 for(i=0;i<w_size;i++)
426 g[i] = w[i] + 2*g[i];
427 if(regularize_bias == 0)
428 g[w_size-1] -= w[w_size-1];
429 }
430
get_diag_preconditioner(double * M)431 void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
432 {
433 int i;
434 int w_size=get_nr_variable();
435 feature_node **x = prob->x;
436
437 for (i=0; i<w_size; i++)
438 M[i] = 1;
439 if(regularize_bias == 0)
440 M[w_size-1] = 0;
441
442 for (i=0; i<sizeI; i++)
443 {
444 int idx = I[i];
445 feature_node *xi = x[idx];
446 while (xi->index!=-1)
447 {
448 M[xi->index-1] += xi->value*xi->value*C[idx]*2;
449 xi++;
450 }
451 }
452 }
453
Hv(double * s,double * Hs)454 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
455 {
456 int i;
457 int w_size=get_nr_variable();
458 feature_node **x=prob->x;
459
460 for(i=0;i<w_size;i++)
461 Hs[i]=0;
462 for(i=0;i<sizeI;i++)
463 {
464 feature_node * const xi=x[I[i]];
465 double xTs = sparse_operator::dot(s, xi);
466
467 xTs = C[I[i]]*xTs;
468
469 sparse_operator::axpy(xTs, xi, Hs);
470 }
471 for(i=0;i<w_size;i++)
472 Hs[i] = s[i] + 2*Hs[i];
473 if(regularize_bias == 0)
474 Hs[w_size-1] -= s[w_size-1];
475 }
476
subXTv(double * v,double * XTv)477 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
478 {
479 int i;
480 int w_size=get_nr_variable();
481 feature_node **x=prob->x;
482
483 for(i=0;i<w_size;i++)
484 XTv[i]=0;
485 for(i=0;i<sizeI;i++)
486 sparse_operator::axpy(v[i], x[I[i]], XTv);
487 }
488
489 class l2r_l2_svr_fun: public l2r_l2_svc_fun
490 {
491 public:
492 l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C);
493
494 void grad(double *w, double *g);
495
496 private:
497 double C_times_loss(int i, double wx_i);
498 double p;
499 };
500
l2r_l2_svr_fun(const problem * prob,const parameter * param,double * C)501 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C):
502 l2r_l2_svc_fun(prob, param, C)
503 {
504 this->p = param->p;
505 this->regularize_bias = param->regularize_bias;
506 }
507
C_times_loss(int i,double wx_i)508 double l2r_l2_svr_fun::C_times_loss(int i, double wx_i)
509 {
510 double d = wx_i - prob->y[i];
511 if(d < -p)
512 return C[i]*(d+p)*(d+p);
513 else if(d > p)
514 return C[i]*(d-p)*(d-p);
515 return 0;
516 }
517
grad(double * w,double * g)518 void l2r_l2_svr_fun::grad(double *w, double *g)
519 {
520 int i;
521 double *y=prob->y;
522 int l=prob->l;
523 int w_size=get_nr_variable();
524 double d;
525
526 sizeI = 0;
527 for(i=0;i<l;i++)
528 {
529 d = wx[i] - y[i];
530
531 // generate index set I
532 if(d < -p)
533 {
534 tmp[sizeI] = C[i]*(d+p);
535 I[sizeI] = i;
536 sizeI++;
537 }
538 else if(d > p)
539 {
540 tmp[sizeI] = C[i]*(d-p);
541 I[sizeI] = i;
542 sizeI++;
543 }
544
545 }
546 subXTv(tmp, g);
547
548 for(i=0;i<w_size;i++)
549 g[i] = w[i] + 2*g[i];
550 if(regularize_bias == 0)
551 g[w_size-1] -= w[w_size-1];
552 }
553
554 // A coordinate descent algorithm for
555 // multi-class support vector machines by Crammer and Singer
556 //
557 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
558 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
559 //
560 // where e^m_i = 0 if y_i = m,
561 // e^m_i = 1 if y_i != m,
562 // C^m_i = C if m = y_i,
563 // C^m_i = 0 if m != y_i,
564 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
565 //
566 // Given:
567 // x, y, C
568 // eps is the stopping tolerance
569 //
570 // solution will be put in w
571 //
572 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
573
574 #define GETI(i) ((int) prob->y[i])
575 // To support weights for instances, use GETI(i) (i)
576
577 class Solver_MCSVM_CS
578 {
579 public:
580 Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
581 ~Solver_MCSVM_CS();
582 void Solve(double *w);
583 private:
584 void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
585 bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
586 double *B, *C, *G;
587 int w_size, l;
588 int nr_class;
589 int max_iter;
590 double eps;
591 const problem *prob;
592 };
593
Solver_MCSVM_CS(const problem * prob,int nr_class,double * weighted_C,double eps,int max_iter)594 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
595 {
596 this->w_size = prob->n;
597 this->l = prob->l;
598 this->nr_class = nr_class;
599 this->eps = eps;
600 this->max_iter = max_iter;
601 this->prob = prob;
602 this->B = new double[nr_class];
603 this->G = new double[nr_class];
604 this->C = weighted_C;
605 }
606
~Solver_MCSVM_CS()607 Solver_MCSVM_CS::~Solver_MCSVM_CS()
608 {
609 delete[] B;
610 delete[] G;
611 }
612
compare_double(const void * a,const void * b)613 int compare_double(const void *a, const void *b)
614 {
615 if(*(double *)a > *(double *)b)
616 return -1;
617 if(*(double *)a < *(double *)b)
618 return 1;
619 return 0;
620 }
621
solve_sub_problem(double A_i,int yi,double C_yi,int active_i,double * alpha_new)622 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
623 {
624 int r;
625 double *D;
626
627 clone(D, B, active_i);
628 if(yi < active_i)
629 D[yi] += A_i*C_yi;
630 qsort(D, active_i, sizeof(double), compare_double);
631
632 double beta = D[0] - A_i*C_yi;
633 for(r=1;r<active_i && beta<r*D[r];r++)
634 beta += D[r];
635 beta /= r;
636
637 for(r=0;r<active_i;r++)
638 {
639 if(r == yi)
640 alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
641 else
642 alpha_new[r] = min((double)0, (beta - B[r])/A_i);
643 }
644 delete[] D;
645 }
646
be_shrunk(int i,int m,int yi,double alpha_i,double minG)647 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
648 {
649 double bound = 0;
650 if(m == yi)
651 bound = C[GETI(i)];
652 if(alpha_i == bound && G[m] < minG)
653 return true;
654 return false;
655 }
656
Solve(double * w)657 void Solver_MCSVM_CS::Solve(double *w)
658 {
659 int i, m, s;
660 int iter = 0;
661 double *alpha = new double[l*nr_class];
662 double *alpha_new = new double[nr_class];
663 int *index = new int[l];
664 double *QD = new double[l];
665 int *d_ind = new int[nr_class];
666 double *d_val = new double[nr_class];
667 int *alpha_index = new int[nr_class*l];
668 int *y_index = new int[l];
669 int active_size = l;
670 int *active_size_i = new int[l];
671 double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
672 bool start_from_all = true;
673
674 // Initial alpha can be set here. Note that
675 // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
676 // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
677 // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
678 // If initial alpha isn't zero, uncomment the for loop below to initialize w
679 for(i=0;i<l*nr_class;i++)
680 alpha[i] = 0;
681
682 for(i=0;i<w_size*nr_class;i++)
683 w[i] = 0;
684 for(i=0;i<l;i++)
685 {
686 for(m=0;m<nr_class;m++)
687 alpha_index[i*nr_class+m] = m;
688 feature_node *xi = prob->x[i];
689 QD[i] = 0;
690 while(xi->index != -1)
691 {
692 double val = xi->value;
693 QD[i] += val*val;
694
695 // Uncomment the for loop if initial alpha isn't zero
696 // for(m=0; m<nr_class; m++)
697 // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
698 xi++;
699 }
700 active_size_i[i] = nr_class;
701 y_index[i] = (int)prob->y[i];
702 index[i] = i;
703 }
704
705 while(iter < max_iter)
706 {
707 double stopping = -INF;
708 for(i=0;i<active_size;i++)
709 {
710 int j = i+rand()%(active_size-i);
711 swap(index[i], index[j]);
712 }
713 for(s=0;s<active_size;s++)
714 {
715 i = index[s];
716 double Ai = QD[i];
717 double *alpha_i = &alpha[i*nr_class];
718 int *alpha_index_i = &alpha_index[i*nr_class];
719
720 if(Ai > 0)
721 {
722 for(m=0;m<active_size_i[i];m++)
723 G[m] = 1;
724 if(y_index[i] < active_size_i[i])
725 G[y_index[i]] = 0;
726
727 feature_node *xi = prob->x[i];
728 while(xi->index!= -1)
729 {
730 double *w_i = &w[(xi->index-1)*nr_class];
731 for(m=0;m<active_size_i[i];m++)
732 G[m] += w_i[alpha_index_i[m]]*(xi->value);
733 xi++;
734 }
735
736 double minG = INF;
737 double maxG = -INF;
738 for(m=0;m<active_size_i[i];m++)
739 {
740 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
741 minG = G[m];
742 if(G[m] > maxG)
743 maxG = G[m];
744 }
745 if(y_index[i] < active_size_i[i])
746 if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
747 minG = G[y_index[i]];
748
749 for(m=0;m<active_size_i[i];m++)
750 {
751 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
752 {
753 active_size_i[i]--;
754 while(active_size_i[i]>m)
755 {
756 if(!be_shrunk(i, active_size_i[i], y_index[i],
757 alpha_i[alpha_index_i[active_size_i[i]]], minG))
758 {
759 swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
760 swap(G[m], G[active_size_i[i]]);
761 if(y_index[i] == active_size_i[i])
762 y_index[i] = m;
763 else if(y_index[i] == m)
764 y_index[i] = active_size_i[i];
765 break;
766 }
767 active_size_i[i]--;
768 }
769 }
770 }
771
772 if(active_size_i[i] <= 1)
773 {
774 active_size--;
775 swap(index[s], index[active_size]);
776 s--;
777 continue;
778 }
779
780 if(maxG-minG <= 1e-12)
781 continue;
782 else
783 stopping = max(maxG - minG, stopping);
784
785 for(m=0;m<active_size_i[i];m++)
786 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
787
788 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
789 int nz_d = 0;
790 for(m=0;m<active_size_i[i];m++)
791 {
792 double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
793 alpha_i[alpha_index_i[m]] = alpha_new[m];
794 if(fabs(d) >= 1e-12)
795 {
796 d_ind[nz_d] = alpha_index_i[m];
797 d_val[nz_d] = d;
798 nz_d++;
799 }
800 }
801
802 xi = prob->x[i];
803 while(xi->index != -1)
804 {
805 double *w_i = &w[(xi->index-1)*nr_class];
806 for(m=0;m<nz_d;m++)
807 w_i[d_ind[m]] += d_val[m]*xi->value;
808 xi++;
809 }
810 }
811 }
812
813 iter++;
814 if(iter % 10 == 0)
815 {
816 info(".");
817 }
818
819 if(stopping < eps_shrink)
820 {
821 if(stopping < eps && start_from_all == true)
822 break;
823 else
824 {
825 active_size = l;
826 for(i=0;i<l;i++)
827 active_size_i[i] = nr_class;
828 info("*");
829 eps_shrink = max(eps_shrink/2, eps);
830 start_from_all = true;
831 }
832 }
833 else
834 start_from_all = false;
835 }
836
837 info("\noptimization finished, #iter = %d\n",iter);
838 if (iter >= max_iter)
839 info("\nWARNING: reaching max number of iterations\n");
840
841 // calculate objective value
842 double v = 0;
843 int nSV = 0;
844 for(i=0;i<w_size*nr_class;i++)
845 v += w[i]*w[i];
846 v = 0.5*v;
847 for(i=0;i<l*nr_class;i++)
848 {
849 v += alpha[i];
850 if(fabs(alpha[i]) > 0)
851 nSV++;
852 }
853 for(i=0;i<l;i++)
854 v -= alpha[i*nr_class+(int)prob->y[i]];
855 info("Objective value = %lf\n",v);
856 info("nSV = %d\n",nSV);
857
858 delete [] alpha;
859 delete [] alpha_new;
860 delete [] index;
861 delete [] QD;
862 delete [] d_ind;
863 delete [] d_val;
864 delete [] alpha_index;
865 delete [] y_index;
866 delete [] active_size_i;
867 }
868
869 // A coordinate descent algorithm for
870 // L1-loss and L2-loss SVM dual problems
871 //
872 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
873 // s.t. 0 <= \alpha_i <= upper_bound_i,
874 //
875 // where Qij = yi yj xi^T xj and
876 // D is a diagonal matrix
877 //
878 // In L1-SVM case:
879 // upper_bound_i = Cp if y_i = 1
880 // upper_bound_i = Cn if y_i = -1
881 // D_ii = 0
882 // In L2-SVM case:
883 // upper_bound_i = INF
884 // D_ii = 1/(2*Cp) if y_i = 1
885 // D_ii = 1/(2*Cn) if y_i = -1
886 //
887 // Given:
888 // x, y, Cp, Cn
889 // eps is the stopping tolerance
890 //
891 // solution will be put in w
892 //
893 // this function returns the number of iterations
894 //
895 // See Algorithm 3 of Hsieh et al., ICML 2008
896
897 #undef GETI
898 #define GETI(i) (y[i]+1)
899 // To support weights for instances, use GETI(i) (i)
900
solve_l2r_l1l2_svc(const problem * prob,const parameter * param,double * w,double Cp,double Cn,int max_iter=300)901 static int solve_l2r_l1l2_svc(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300)
902 {
903 int l = prob->l;
904 int w_size = prob->n;
905 double eps = param->eps;
906 int solver_type = param->solver_type;
907 int i, s, iter = 0;
908 double C, d, G;
909 double *QD = new double[l];
910 int *index = new int[l];
911 double *alpha = new double[l];
912 schar *y = new schar[l];
913 int active_size = l;
914
915 // PG: projected gradient, for shrinking and stopping
916 double PG;
917 double PGmax_old = INF;
918 double PGmin_old = -INF;
919 double PGmax_new, PGmin_new;
920
921 // default solver_type: L2R_L2LOSS_SVC_DUAL
922 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
923 double upper_bound[3] = {INF, 0, INF};
924 if(solver_type == L2R_L1LOSS_SVC_DUAL)
925 {
926 diag[0] = 0;
927 diag[2] = 0;
928 upper_bound[0] = Cn;
929 upper_bound[2] = Cp;
930 }
931
932 for(i=0; i<l; i++)
933 {
934 if(prob->y[i] > 0)
935 {
936 y[i] = +1;
937 }
938 else
939 {
940 y[i] = -1;
941 }
942 }
943
944 // Initial alpha can be set here. Note that
945 // 0 <= alpha[i] <= upper_bound[GETI(i)]
946 for(i=0; i<l; i++)
947 alpha[i] = 0;
948
949 for(i=0; i<w_size; i++)
950 w[i] = 0;
951 for(i=0; i<l; i++)
952 {
953 QD[i] = diag[GETI(i)];
954
955 feature_node * const xi = prob->x[i];
956 QD[i] += sparse_operator::nrm2_sq(xi);
957 sparse_operator::axpy(y[i]*alpha[i], xi, w);
958
959 index[i] = i;
960 }
961
962 while (iter < max_iter)
963 {
964 PGmax_new = -INF;
965 PGmin_new = INF;
966
967 for (i=0; i<active_size; i++)
968 {
969 int j = i+rand()%(active_size-i);
970 swap(index[i], index[j]);
971 }
972
973 for (s=0; s<active_size; s++)
974 {
975 i = index[s];
976 const schar yi = y[i];
977 feature_node * const xi = prob->x[i];
978
979 G = yi*sparse_operator::dot(w, xi)-1;
980
981 C = upper_bound[GETI(i)];
982 G += alpha[i]*diag[GETI(i)];
983
984 PG = 0;
985 if (alpha[i] == 0)
986 {
987 if (G > PGmax_old)
988 {
989 active_size--;
990 swap(index[s], index[active_size]);
991 s--;
992 continue;
993 }
994 else if (G < 0)
995 PG = G;
996 }
997 else if (alpha[i] == C)
998 {
999 if (G < PGmin_old)
1000 {
1001 active_size--;
1002 swap(index[s], index[active_size]);
1003 s--;
1004 continue;
1005 }
1006 else if (G > 0)
1007 PG = G;
1008 }
1009 else
1010 PG = G;
1011
1012 PGmax_new = max(PGmax_new, PG);
1013 PGmin_new = min(PGmin_new, PG);
1014
1015 if(fabs(PG) > 1.0e-12)
1016 {
1017 double alpha_old = alpha[i];
1018 alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
1019 d = (alpha[i] - alpha_old)*yi;
1020 sparse_operator::axpy(d, xi, w);
1021 }
1022 }
1023
1024 iter++;
1025 if(iter % 10 == 0)
1026 info(".");
1027
1028 if(PGmax_new - PGmin_new <= eps &&
1029 fabs(PGmax_new) <= eps && fabs(PGmin_new) <= eps)
1030 {
1031 if(active_size == l)
1032 break;
1033 else
1034 {
1035 active_size = l;
1036 info("*");
1037 PGmax_old = INF;
1038 PGmin_old = -INF;
1039 continue;
1040 }
1041 }
1042 PGmax_old = PGmax_new;
1043 PGmin_old = PGmin_new;
1044 if (PGmax_old <= 0)
1045 PGmax_old = INF;
1046 if (PGmin_old >= 0)
1047 PGmin_old = -INF;
1048 }
1049
1050 info("\noptimization finished, #iter = %d\n",iter);
1051
1052 // calculate objective value
1053
1054 double v = 0;
1055 int nSV = 0;
1056 for(i=0; i<w_size; i++)
1057 v += w[i]*w[i];
1058 for(i=0; i<l; i++)
1059 {
1060 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
1061 if(alpha[i] > 0)
1062 ++nSV;
1063 }
1064 info("Objective value = %lf\n",v/2);
1065 info("nSV = %d\n",nSV);
1066
1067 delete [] QD;
1068 delete [] alpha;
1069 delete [] y;
1070 delete [] index;
1071
1072 return iter;
1073 }
1074
1075
1076 // A coordinate descent algorithm for
1077 // L1-loss and L2-loss epsilon-SVR dual problem
1078 //
1079 // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
1080 // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
1081 //
1082 // where Qij = xi^T xj and
1083 // D is a diagonal matrix
1084 //
1085 // In L1-SVM case:
1086 // upper_bound_i = C
1087 // lambda_i = 0
1088 // In L2-SVM case:
1089 // upper_bound_i = INF
1090 // lambda_i = 1/(2*C)
1091 //
1092 // Given:
1093 // x, y, p, C
1094 // eps is the stopping tolerance
1095 //
1096 // solution will be put in w
1097 //
1098 // this function returns the number of iterations
1099 //
1100 // See Algorithm 4 of Ho and Lin, 2012
1101
1102 #undef GETI
1103 #define GETI(i) (0)
1104 // To support weights for instances, use GETI(i) (i)
1105
solve_l2r_l1l2_svr(const problem * prob,const parameter * param,double * w,int max_iter=300)1106 static int solve_l2r_l1l2_svr(const problem *prob, const parameter *param, double *w, int max_iter=300)
1107 {
1108 const int solver_type = param->solver_type;
1109 int l = prob->l;
1110 double C = param->C;
1111 double p = param->p;
1112 int w_size = prob->n;
1113 double eps = param->eps;
1114 int i, s, iter = 0;
1115 int active_size = l;
1116 int *index = new int[l];
1117
1118 double d, G, H;
1119 double Gmax_old = INF;
1120 double Gmax_new, Gnorm1_new;
1121 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1122 double *beta = new double[l];
1123 double *QD = new double[l];
1124 double *y = prob->y;
1125
1126 // L2R_L2LOSS_SVR_DUAL
1127 double lambda[1], upper_bound[1];
1128 lambda[0] = 0.5/C;
1129 upper_bound[0] = INF;
1130
1131 if(solver_type == L2R_L1LOSS_SVR_DUAL)
1132 {
1133 lambda[0] = 0;
1134 upper_bound[0] = C;
1135 }
1136
1137 // Initial beta can be set here. Note that
1138 // -upper_bound <= beta[i] <= upper_bound
1139 for(i=0; i<l; i++)
1140 beta[i] = 0;
1141
1142 for(i=0; i<w_size; i++)
1143 w[i] = 0;
1144 for(i=0; i<l; i++)
1145 {
1146 feature_node * const xi = prob->x[i];
1147 QD[i] = sparse_operator::nrm2_sq(xi);
1148 sparse_operator::axpy(beta[i], xi, w);
1149
1150 index[i] = i;
1151 }
1152
1153
1154 while(iter < max_iter)
1155 {
1156 Gmax_new = 0;
1157 Gnorm1_new = 0;
1158
1159 for(i=0; i<active_size; i++)
1160 {
1161 int j = i+rand()%(active_size-i);
1162 swap(index[i], index[j]);
1163 }
1164
1165 for(s=0; s<active_size; s++)
1166 {
1167 i = index[s];
1168 G = -y[i] + lambda[GETI(i)]*beta[i];
1169 H = QD[i] + lambda[GETI(i)];
1170
1171 feature_node * const xi = prob->x[i];
1172 G += sparse_operator::dot(w, xi);
1173
1174 double Gp = G+p;
1175 double Gn = G-p;
1176 double violation = 0;
1177 if(beta[i] == 0)
1178 {
1179 if(Gp < 0)
1180 violation = -Gp;
1181 else if(Gn > 0)
1182 violation = Gn;
1183 else if(Gp>Gmax_old && Gn<-Gmax_old)
1184 {
1185 active_size--;
1186 swap(index[s], index[active_size]);
1187 s--;
1188 continue;
1189 }
1190 }
1191 else if(beta[i] >= upper_bound[GETI(i)])
1192 {
1193 if(Gp > 0)
1194 violation = Gp;
1195 else if(Gp < -Gmax_old)
1196 {
1197 active_size--;
1198 swap(index[s], index[active_size]);
1199 s--;
1200 continue;
1201 }
1202 }
1203 else if(beta[i] <= -upper_bound[GETI(i)])
1204 {
1205 if(Gn < 0)
1206 violation = -Gn;
1207 else if(Gn > Gmax_old)
1208 {
1209 active_size--;
1210 swap(index[s], index[active_size]);
1211 s--;
1212 continue;
1213 }
1214 }
1215 else if(beta[i] > 0)
1216 violation = fabs(Gp);
1217 else
1218 violation = fabs(Gn);
1219
1220 Gmax_new = max(Gmax_new, violation);
1221 Gnorm1_new += violation;
1222
1223 // obtain Newton direction d
1224 if(Gp < H*beta[i])
1225 d = -Gp/H;
1226 else if(Gn > H*beta[i])
1227 d = -Gn/H;
1228 else
1229 d = -beta[i];
1230
1231 if(fabs(d) < 1.0e-12)
1232 continue;
1233
1234 double beta_old = beta[i];
1235 beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1236 d = beta[i]-beta_old;
1237
1238 if(d != 0)
1239 sparse_operator::axpy(d, xi, w);
1240 }
1241
1242 if(iter == 0)
1243 Gnorm1_init = Gnorm1_new;
1244 iter++;
1245 if(iter % 10 == 0)
1246 info(".");
1247
1248 if(Gnorm1_new <= eps*Gnorm1_init)
1249 {
1250 if(active_size == l)
1251 break;
1252 else
1253 {
1254 active_size = l;
1255 info("*");
1256 Gmax_old = INF;
1257 continue;
1258 }
1259 }
1260
1261 Gmax_old = Gmax_new;
1262 }
1263
1264 info("\noptimization finished, #iter = %d\n", iter);
1265
1266 // calculate objective value
1267 double v = 0;
1268 int nSV = 0;
1269 for(i=0; i<w_size; i++)
1270 v += w[i]*w[i];
1271 v = 0.5*v;
1272 for(i=0; i<l; i++)
1273 {
1274 v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1275 if(beta[i] != 0)
1276 nSV++;
1277 }
1278
1279 info("Objective value = %lf\n", v);
1280 info("nSV = %d\n",nSV);
1281
1282 delete [] beta;
1283 delete [] QD;
1284 delete [] index;
1285
1286 return iter;
1287 }
1288
1289
1290 // A coordinate descent algorithm for
1291 // the dual of L2-regularized logistic regression problems
1292 //
1293 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1294 // s.t. 0 <= \alpha_i <= upper_bound_i,
1295 //
1296 // where Qij = yi yj xi^T xj and
1297 // upper_bound_i = Cp if y_i = 1
1298 // upper_bound_i = Cn if y_i = -1
1299 //
1300 // Given:
1301 // x, y, Cp, Cn
1302 // eps is the stopping tolerance
1303 //
1304 // solution will be put in w
1305 //
1306 // this function returns the number of iterations
1307 //
1308 // See Algorithm 5 of Yu et al., MLJ 2010
1309
1310 #undef GETI
1311 #define GETI(i) (y[i]+1)
1312 // To support weights for instances, use GETI(i) (i)
1313
solve_l2r_lr_dual(const problem * prob,const parameter * param,double * w,double Cp,double Cn,int max_iter=300)1314 static int solve_l2r_lr_dual(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300)
1315 {
1316 int l = prob->l;
1317 int w_size = prob->n;
1318 double eps = param->eps;
1319 int i, s, iter = 0;
1320 double *xTx = new double[l];
1321 int *index = new int[l];
1322 double *alpha = new double[2*l]; // store alpha and C - alpha
1323 schar *y = new schar[l];
1324 int max_inner_iter = 100; // for inner Newton
1325 double innereps = 1e-2;
1326 double innereps_min = min(1e-8, eps);
1327 double upper_bound[3] = {Cn, 0, Cp};
1328
1329 for(i=0; i<l; i++)
1330 {
1331 if(prob->y[i] > 0)
1332 {
1333 y[i] = +1;
1334 }
1335 else
1336 {
1337 y[i] = -1;
1338 }
1339 }
1340
1341 // Initial alpha can be set here. Note that
1342 // 0 < alpha[i] < upper_bound[GETI(i)]
1343 // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1344 for(i=0; i<l; i++)
1345 {
1346 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1347 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1348 }
1349
1350 for(i=0; i<w_size; i++)
1351 w[i] = 0;
1352 for(i=0; i<l; i++)
1353 {
1354 feature_node * const xi = prob->x[i];
1355 xTx[i] = sparse_operator::nrm2_sq(xi);
1356 sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1357 index[i] = i;
1358 }
1359
1360 while (iter < max_iter)
1361 {
1362 for (i=0; i<l; i++)
1363 {
1364 int j = i+rand()%(l-i);
1365 swap(index[i], index[j]);
1366 }
1367 int newton_iter = 0;
1368 double Gmax = 0;
1369 for (s=0; s<l; s++)
1370 {
1371 i = index[s];
1372 const schar yi = y[i];
1373 double C = upper_bound[GETI(i)];
1374 double ywTx = 0, xisq = xTx[i];
1375 feature_node * const xi = prob->x[i];
1376 ywTx = yi*sparse_operator::dot(w, xi);
1377 double a = xisq, b = ywTx;
1378
1379 // Decide to minimize g_1(z) or g_2(z)
1380 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1381 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1382 {
1383 ind1 = 2*i+1;
1384 ind2 = 2*i;
1385 sign = -1;
1386 }
1387
1388 // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1389 double alpha_old = alpha[ind1];
1390 double z = alpha_old;
1391 if(C - z < 0.5 * C)
1392 z = 0.1*z;
1393 double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1394 Gmax = max(Gmax, fabs(gp));
1395
1396 // Newton method on the sub-problem
1397 const double eta = 0.1; // xi in the paper
1398 int inner_iter = 0;
1399 while (inner_iter <= max_inner_iter)
1400 {
1401 if(fabs(gp) < innereps)
1402 break;
1403 double gpp = a + C/(C-z)/z;
1404 double tmpz = z - gp/gpp;
1405 if(tmpz <= 0)
1406 z *= eta;
1407 else // tmpz in (0, C)
1408 z = tmpz;
1409 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1410 newton_iter++;
1411 inner_iter++;
1412 }
1413
1414 if(inner_iter > 0) // update w
1415 {
1416 alpha[ind1] = z;
1417 alpha[ind2] = C-z;
1418 sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1419 }
1420 }
1421
1422 iter++;
1423 if(iter % 10 == 0)
1424 info(".");
1425
1426 if(Gmax < eps)
1427 break;
1428
1429 if(newton_iter <= l/10)
1430 innereps = max(innereps_min, 0.1*innereps);
1431
1432 }
1433
1434 info("\noptimization finished, #iter = %d\n",iter);
1435
1436 // calculate objective value
1437
1438 double v = 0;
1439 for(i=0; i<w_size; i++)
1440 v += w[i] * w[i];
1441 v *= 0.5;
1442 for(i=0; i<l; i++)
1443 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1444 - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1445 info("Objective value = %lf\n", v);
1446
1447 delete [] xTx;
1448 delete [] alpha;
1449 delete [] y;
1450 delete [] index;
1451
1452 return iter;
1453 }
1454
1455 // A coordinate descent algorithm for
1456 // L1-regularized L2-loss support vector classification
1457 //
1458 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1459 //
1460 // Given:
1461 // x, y, Cp, Cn
1462 // eps is the stopping tolerance
1463 //
1464 // solution will be put in w
1465 //
1466 // this function returns the number of iterations
1467 //
1468 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1469 //
1470 // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1471 // must have been added to the original data. (see -B and -R option)
1472
1473 #undef GETI
1474 #define GETI(i) (y[i]+1)
1475 // To support weights for instances, use GETI(i) (i)
1476
solve_l1r_l2_svc(const problem * prob_col,const parameter * param,double * w,double Cp,double Cn,double eps)1477 static int solve_l1r_l2_svc(const problem *prob_col, const parameter* param, double *w, double Cp, double Cn, double eps)
1478 {
1479 int l = prob_col->l;
1480 int w_size = prob_col->n;
1481 int regularize_bias = param->regularize_bias;
1482 int j, s, iter = 0;
1483 int max_iter = 1000;
1484 int active_size = w_size;
1485 int max_num_linesearch = 20;
1486
1487 double sigma = 0.01;
1488 double d, G_loss, G, H;
1489 double Gmax_old = INF;
1490 double Gmax_new, Gnorm1_new;
1491 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1492 double d_old, d_diff;
1493 double loss_old = 0, loss_new;
1494 double appxcond, cond;
1495
1496 int *index = new int[w_size];
1497 schar *y = new schar[l];
1498 double *b = new double[l]; // b = 1-ywTx
1499 double *xj_sq = new double[w_size];
1500 feature_node *x;
1501
1502 double C[3] = {Cn,0,Cp};
1503
1504 // Initial w can be set here.
1505 for(j=0; j<w_size; j++)
1506 w[j] = 0;
1507
1508 for(j=0; j<l; j++)
1509 {
1510 b[j] = 1;
1511 if(prob_col->y[j] > 0)
1512 y[j] = 1;
1513 else
1514 y[j] = -1;
1515 }
1516 for(j=0; j<w_size; j++)
1517 {
1518 index[j] = j;
1519 xj_sq[j] = 0;
1520 x = prob_col->x[j];
1521 while(x->index != -1)
1522 {
1523 int ind = x->index-1;
1524 x->value *= y[ind]; // x->value stores yi*xij
1525 double val = x->value;
1526 b[ind] -= w[j]*val;
1527 xj_sq[j] += C[GETI(ind)]*val*val;
1528 x++;
1529 }
1530 }
1531
1532 while(iter < max_iter)
1533 {
1534 Gmax_new = 0;
1535 Gnorm1_new = 0;
1536
1537 for(j=0; j<active_size; j++)
1538 {
1539 int i = j+rand()%(active_size-j);
1540 swap(index[i], index[j]);
1541 }
1542
1543 for(s=0; s<active_size; s++)
1544 {
1545 j = index[s];
1546 G_loss = 0;
1547 H = 0;
1548
1549 x = prob_col->x[j];
1550 while(x->index != -1)
1551 {
1552 int ind = x->index-1;
1553 if(b[ind] > 0)
1554 {
1555 double val = x->value;
1556 double tmp = C[GETI(ind)]*val;
1557 G_loss -= tmp*b[ind];
1558 H += tmp*val;
1559 }
1560 x++;
1561 }
1562 G_loss *= 2;
1563
1564 G = G_loss;
1565 H *= 2;
1566 H = max(H, 1e-12);
1567
1568 double violation = 0;
1569 double Gp = 0, Gn = 0;
1570 if(j == w_size-1 && regularize_bias == 0)
1571 violation = fabs(G);
1572 else
1573 {
1574 Gp = G+1;
1575 Gn = G-1;
1576 if(w[j] == 0)
1577 {
1578 if(Gp < 0)
1579 violation = -Gp;
1580 else if(Gn > 0)
1581 violation = Gn;
1582 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1583 {
1584 active_size--;
1585 swap(index[s], index[active_size]);
1586 s--;
1587 continue;
1588 }
1589 }
1590 else if(w[j] > 0)
1591 violation = fabs(Gp);
1592 else
1593 violation = fabs(Gn);
1594 }
1595 Gmax_new = max(Gmax_new, violation);
1596 Gnorm1_new += violation;
1597
1598 // obtain Newton direction d
1599 if(j == w_size-1 && regularize_bias == 0)
1600 d = -G/H;
1601 else
1602 {
1603 if(Gp < H*w[j])
1604 d = -Gp/H;
1605 else if(Gn > H*w[j])
1606 d = -Gn/H;
1607 else
1608 d = -w[j];
1609 }
1610
1611 if(fabs(d) < 1.0e-12)
1612 continue;
1613
1614 double delta;
1615 if(j == w_size-1 && regularize_bias == 0)
1616 delta = G*d;
1617 else
1618 delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1619 d_old = 0;
1620 int num_linesearch;
1621 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1622 {
1623 d_diff = d_old - d;
1624 if(j == w_size-1 && regularize_bias == 0)
1625 cond = -sigma*delta;
1626 else
1627 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1628
1629 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1630 if(appxcond <= 0)
1631 {
1632 x = prob_col->x[j];
1633 sparse_operator::axpy(d_diff, x, b);
1634 break;
1635 }
1636
1637 if(num_linesearch == 0)
1638 {
1639 loss_old = 0;
1640 loss_new = 0;
1641 x = prob_col->x[j];
1642 while(x->index != -1)
1643 {
1644 int ind = x->index-1;
1645 if(b[ind] > 0)
1646 loss_old += C[GETI(ind)]*b[ind]*b[ind];
1647 double b_new = b[ind] + d_diff*x->value;
1648 b[ind] = b_new;
1649 if(b_new > 0)
1650 loss_new += C[GETI(ind)]*b_new*b_new;
1651 x++;
1652 }
1653 }
1654 else
1655 {
1656 loss_new = 0;
1657 x = prob_col->x[j];
1658 while(x->index != -1)
1659 {
1660 int ind = x->index-1;
1661 double b_new = b[ind] + d_diff*x->value;
1662 b[ind] = b_new;
1663 if(b_new > 0)
1664 loss_new += C[GETI(ind)]*b_new*b_new;
1665 x++;
1666 }
1667 }
1668
1669 cond = cond + loss_new - loss_old;
1670 if(cond <= 0)
1671 break;
1672 else
1673 {
1674 d_old = d;
1675 d *= 0.5;
1676 delta *= 0.5;
1677 }
1678 }
1679
1680 w[j] += d;
1681
1682 // recompute b[] if line search takes too many steps
1683 if(num_linesearch >= max_num_linesearch)
1684 {
1685 info("#");
1686 for(int i=0; i<l; i++)
1687 b[i] = 1;
1688
1689 for(int i=0; i<w_size; i++)
1690 {
1691 if(w[i]==0) continue;
1692 x = prob_col->x[i];
1693 sparse_operator::axpy(-w[i], x, b);
1694 }
1695 }
1696 }
1697
1698 if(iter == 0)
1699 Gnorm1_init = Gnorm1_new;
1700 iter++;
1701 if(iter % 10 == 0)
1702 info(".");
1703
1704 if(Gnorm1_new <= eps*Gnorm1_init)
1705 {
1706 if(active_size == w_size)
1707 break;
1708 else
1709 {
1710 active_size = w_size;
1711 info("*");
1712 Gmax_old = INF;
1713 continue;
1714 }
1715 }
1716
1717 Gmax_old = Gmax_new;
1718 }
1719
1720 info("\noptimization finished, #iter = %d\n", iter);
1721 if(iter >= max_iter)
1722 info("\nWARNING: reaching max number of iterations\n");
1723
1724 // calculate objective value
1725
1726 double v = 0;
1727 int nnz = 0;
1728 for(j=0; j<w_size; j++)
1729 {
1730 x = prob_col->x[j];
1731 while(x->index != -1)
1732 {
1733 x->value *= prob_col->y[x->index-1]; // restore x->value
1734 x++;
1735 }
1736 if(w[j] != 0)
1737 {
1738 v += fabs(w[j]);
1739 nnz++;
1740 }
1741 }
1742 if (regularize_bias == 0)
1743 v -= fabs(w[w_size-1]);
1744 for(j=0; j<l; j++)
1745 if(b[j] > 0)
1746 v += C[GETI(j)]*b[j]*b[j];
1747
1748 info("Objective value = %lf\n", v);
1749 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1750
1751 delete [] index;
1752 delete [] y;
1753 delete [] b;
1754 delete [] xj_sq;
1755
1756 return iter;
1757 }
1758
1759 // A coordinate descent algorithm for
1760 // L1-regularized logistic regression problems
1761 //
1762 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1763 //
1764 // Given:
1765 // x, y, Cp, Cn
1766 // eps is the stopping tolerance
1767 //
1768 // solution will be put in w
1769 //
1770 // this function returns the number of iterations
1771 //
1772 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1773 //
1774 // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1775 // must have been added to the original data. (see -B and -R option)
1776
1777 #undef GETI
1778 #define GETI(i) (y[i]+1)
1779 // To support weights for instances, use GETI(i) (i)
1780
solve_l1r_lr(const problem * prob_col,const parameter * param,double * w,double Cp,double Cn,double eps)1781 static int solve_l1r_lr(const problem *prob_col, const parameter *param, double *w, double Cp, double Cn, double eps)
1782 {
1783 int l = prob_col->l;
1784 int w_size = prob_col->n;
1785 int regularize_bias = param->regularize_bias;
1786 int j, s, newton_iter=0, iter=0;
1787 int max_newton_iter = 100;
1788 int max_iter = 1000;
1789 int max_num_linesearch = 20;
1790 int active_size;
1791 int QP_active_size;
1792
1793 double nu = 1e-12;
1794 double inner_eps = 1;
1795 double sigma = 0.01;
1796 double w_norm, w_norm_new;
1797 double z, G, H;
1798 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1799 double Gmax_old = INF;
1800 double Gmax_new, Gnorm1_new;
1801 double QP_Gmax_old = INF;
1802 double QP_Gmax_new, QP_Gnorm1_new;
1803 double delta, negsum_xTd, cond;
1804
1805 int *index = new int[w_size];
1806 schar *y = new schar[l];
1807 double *Hdiag = new double[w_size];
1808 double *Grad = new double[w_size];
1809 double *wpd = new double[w_size];
1810 double *xjneg_sum = new double[w_size];
1811 double *xTd = new double[l];
1812 double *exp_wTx = new double[l];
1813 double *exp_wTx_new = new double[l];
1814 double *tau = new double[l];
1815 double *D = new double[l];
1816 feature_node *x;
1817
1818 double C[3] = {Cn,0,Cp};
1819
1820 // Initial w can be set here.
1821 for(j=0; j<w_size; j++)
1822 w[j] = 0;
1823
1824 for(j=0; j<l; j++)
1825 {
1826 if(prob_col->y[j] > 0)
1827 y[j] = 1;
1828 else
1829 y[j] = -1;
1830
1831 exp_wTx[j] = 0;
1832 }
1833
1834 w_norm = 0;
1835 for(j=0; j<w_size; j++)
1836 {
1837 w_norm += fabs(w[j]);
1838 wpd[j] = w[j];
1839 index[j] = j;
1840 xjneg_sum[j] = 0;
1841 x = prob_col->x[j];
1842 while(x->index != -1)
1843 {
1844 int ind = x->index-1;
1845 double val = x->value;
1846 exp_wTx[ind] += w[j]*val;
1847 if(y[ind] == -1)
1848 xjneg_sum[j] += C[GETI(ind)]*val;
1849 x++;
1850 }
1851 }
1852 if (regularize_bias == 0)
1853 w_norm -= fabs(w[w_size-1]);
1854
1855 for(j=0; j<l; j++)
1856 {
1857 exp_wTx[j] = exp(exp_wTx[j]);
1858 double tau_tmp = 1/(1+exp_wTx[j]);
1859 tau[j] = C[GETI(j)]*tau_tmp;
1860 D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1861 }
1862
1863 while(newton_iter < max_newton_iter)
1864 {
1865 Gmax_new = 0;
1866 Gnorm1_new = 0;
1867 active_size = w_size;
1868
1869 for(s=0; s<active_size; s++)
1870 {
1871 j = index[s];
1872 Hdiag[j] = nu;
1873 Grad[j] = 0;
1874
1875 double tmp = 0;
1876 x = prob_col->x[j];
1877 while(x->index != -1)
1878 {
1879 int ind = x->index-1;
1880 Hdiag[j] += x->value*x->value*D[ind];
1881 tmp += x->value*tau[ind];
1882 x++;
1883 }
1884 Grad[j] = -tmp + xjneg_sum[j];
1885
1886 double violation = 0;
1887 if (j == w_size-1 && regularize_bias == 0)
1888 violation = fabs(Grad[j]);
1889 else
1890 {
1891 double Gp = Grad[j]+1;
1892 double Gn = Grad[j]-1;
1893 if(w[j] == 0)
1894 {
1895 if(Gp < 0)
1896 violation = -Gp;
1897 else if(Gn > 0)
1898 violation = Gn;
1899 //outer-level shrinking
1900 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1901 {
1902 active_size--;
1903 swap(index[s], index[active_size]);
1904 s--;
1905 continue;
1906 }
1907 }
1908 else if(w[j] > 0)
1909 violation = fabs(Gp);
1910 else
1911 violation = fabs(Gn);
1912 }
1913 Gmax_new = max(Gmax_new, violation);
1914 Gnorm1_new += violation;
1915 }
1916
1917 if(newton_iter == 0)
1918 Gnorm1_init = Gnorm1_new;
1919
1920 if(Gnorm1_new <= eps*Gnorm1_init)
1921 break;
1922
1923 iter = 0;
1924 QP_Gmax_old = INF;
1925 QP_active_size = active_size;
1926
1927 for(int i=0; i<l; i++)
1928 xTd[i] = 0;
1929
1930 // optimize QP over wpd
1931 while(iter < max_iter)
1932 {
1933 QP_Gmax_new = 0;
1934 QP_Gnorm1_new = 0;
1935
1936 for(j=0; j<QP_active_size; j++)
1937 {
1938 int i = j+rand()%(QP_active_size-j);
1939 swap(index[i], index[j]);
1940 }
1941
1942 for(s=0; s<QP_active_size; s++)
1943 {
1944 j = index[s];
1945 H = Hdiag[j];
1946
1947 x = prob_col->x[j];
1948 G = Grad[j] + (wpd[j]-w[j])*nu;
1949 while(x->index != -1)
1950 {
1951 int ind = x->index-1;
1952 G += x->value*D[ind]*xTd[ind];
1953 x++;
1954 }
1955
1956 double violation = 0;
1957 if (j == w_size-1 && regularize_bias == 0)
1958 {
1959 // bias term not shrunken
1960 violation = fabs(G);
1961 z = -G/H;
1962 }
1963 else
1964 {
1965 double Gp = G+1;
1966 double Gn = G-1;
1967 if(wpd[j] == 0)
1968 {
1969 if(Gp < 0)
1970 violation = -Gp;
1971 else if(Gn > 0)
1972 violation = Gn;
1973 //inner-level shrinking
1974 else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1975 {
1976 QP_active_size--;
1977 swap(index[s], index[QP_active_size]);
1978 s--;
1979 continue;
1980 }
1981 }
1982 else if(wpd[j] > 0)
1983 violation = fabs(Gp);
1984 else
1985 violation = fabs(Gn);
1986
1987 // obtain solution of one-variable problem
1988 if(Gp < H*wpd[j])
1989 z = -Gp/H;
1990 else if(Gn > H*wpd[j])
1991 z = -Gn/H;
1992 else
1993 z = -wpd[j];
1994 }
1995 QP_Gmax_new = max(QP_Gmax_new, violation);
1996 QP_Gnorm1_new += violation;
1997
1998 if(fabs(z) < 1.0e-12)
1999 continue;
2000 z = min(max(z,-10.0),10.0);
2001
2002 wpd[j] += z;
2003
2004 x = prob_col->x[j];
2005 sparse_operator::axpy(z, x, xTd);
2006 }
2007
2008 iter++;
2009
2010 if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
2011 {
2012 //inner stopping
2013 if(QP_active_size == active_size)
2014 break;
2015 //active set reactivation
2016 else
2017 {
2018 QP_active_size = active_size;
2019 QP_Gmax_old = INF;
2020 continue;
2021 }
2022 }
2023
2024 QP_Gmax_old = QP_Gmax_new;
2025 }
2026
2027 if(iter >= max_iter)
2028 info("WARNING: reaching max number of inner iterations\n");
2029
2030 delta = 0;
2031 w_norm_new = 0;
2032 for(j=0; j<w_size; j++)
2033 {
2034 delta += Grad[j]*(wpd[j]-w[j]);
2035 if(wpd[j] != 0)
2036 w_norm_new += fabs(wpd[j]);
2037 }
2038 if (regularize_bias == 0)
2039 w_norm_new -= fabs(wpd[w_size-1]);
2040 delta += (w_norm_new-w_norm);
2041
2042 negsum_xTd = 0;
2043 for(int i=0; i<l; i++)
2044 if(y[i] == -1)
2045 negsum_xTd += C[GETI(i)]*xTd[i];
2046
2047 int num_linesearch;
2048 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
2049 {
2050 cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
2051
2052 for(int i=0; i<l; i++)
2053 {
2054 double exp_xTd = exp(xTd[i]);
2055 exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
2056 cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
2057 }
2058
2059 if(cond <= 0)
2060 {
2061 w_norm = w_norm_new;
2062 for(j=0; j<w_size; j++)
2063 w[j] = wpd[j];
2064 for(int i=0; i<l; i++)
2065 {
2066 exp_wTx[i] = exp_wTx_new[i];
2067 double tau_tmp = 1/(1+exp_wTx[i]);
2068 tau[i] = C[GETI(i)]*tau_tmp;
2069 D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
2070 }
2071 break;
2072 }
2073 else
2074 {
2075 w_norm_new = 0;
2076 for(j=0; j<w_size; j++)
2077 {
2078 wpd[j] = (w[j]+wpd[j])*0.5;
2079 if(wpd[j] != 0)
2080 w_norm_new += fabs(wpd[j]);
2081 }
2082 if (regularize_bias == 0)
2083 w_norm_new -= fabs(wpd[w_size-1]);
2084 delta *= 0.5;
2085 negsum_xTd *= 0.5;
2086 for(int i=0; i<l; i++)
2087 xTd[i] *= 0.5;
2088 }
2089 }
2090
2091 // Recompute some info due to too many line search steps
2092 if(num_linesearch >= max_num_linesearch)
2093 {
2094 for(int i=0; i<l; i++)
2095 exp_wTx[i] = 0;
2096
2097 for(int i=0; i<w_size; i++)
2098 {
2099 if(w[i]==0) continue;
2100 x = prob_col->x[i];
2101 sparse_operator::axpy(w[i], x, exp_wTx);
2102 }
2103
2104 for(int i=0; i<l; i++)
2105 exp_wTx[i] = exp(exp_wTx[i]);
2106 }
2107
2108 if(iter == 1)
2109 inner_eps *= 0.25;
2110
2111 newton_iter++;
2112 Gmax_old = Gmax_new;
2113
2114 info("iter %3d #CD cycles %d\n", newton_iter, iter);
2115 }
2116
2117 info("=========================\n");
2118 info("optimization finished, #iter = %d\n", newton_iter);
2119 if(newton_iter >= max_newton_iter)
2120 info("WARNING: reaching max number of iterations\n");
2121
2122 // calculate objective value
2123
2124 double v = 0;
2125 int nnz = 0;
2126 for(j=0; j<w_size; j++)
2127 if(w[j] != 0)
2128 {
2129 v += fabs(w[j]);
2130 nnz++;
2131 }
2132 if (regularize_bias == 0)
2133 v -= fabs(w[w_size-1]);
2134 for(j=0; j<l; j++)
2135 if(y[j] == 1)
2136 v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2137 else
2138 v += C[GETI(j)]*log(1+exp_wTx[j]);
2139
2140 info("Objective value = %lf\n", v);
2141 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2142
2143 delete [] index;
2144 delete [] y;
2145 delete [] Hdiag;
2146 delete [] Grad;
2147 delete [] wpd;
2148 delete [] xjneg_sum;
2149 delete [] xTd;
2150 delete [] exp_wTx;
2151 delete [] exp_wTx_new;
2152 delete [] tau;
2153 delete [] D;
2154
2155 return newton_iter;
2156 }
2157
2158 struct heap {
2159 enum HEAP_TYPE { MIN, MAX };
2160 int _size;
2161 HEAP_TYPE _type;
2162 feature_node* a;
2163
heapheap2164 heap(int max_size, HEAP_TYPE type)
2165 {
2166 _size = 0;
2167 a = new feature_node[max_size];
2168 _type = type;
2169 }
~heapheap2170 ~heap()
2171 {
2172 delete [] a;
2173 }
cmpheap2174 bool cmp(const feature_node& left, const feature_node& right)
2175 {
2176 if(_type == MIN)
2177 return left.value > right.value;
2178 else
2179 return left.value < right.value;
2180 }
sizeheap2181 int size()
2182 {
2183 return _size;
2184 }
pushheap2185 void push(feature_node node)
2186 {
2187 a[_size] = node;
2188 _size++;
2189 int i = _size-1;
2190 while(i)
2191 {
2192 int p = (i-1)/2;
2193 if(cmp(a[p], a[i]))
2194 {
2195 swap(a[i], a[p]);
2196 i = p;
2197 }
2198 else
2199 break;
2200 }
2201 }
popheap2202 void pop()
2203 {
2204 _size--;
2205 a[0] = a[_size];
2206 int i = 0;
2207 while(i*2+1 < _size)
2208 {
2209 int l = i*2+1;
2210 int r = i*2+2;
2211 if(r < _size && cmp(a[l], a[r]))
2212 l = r;
2213 if(cmp(a[i], a[l]))
2214 {
2215 swap(a[i], a[l]);
2216 i = l;
2217 }
2218 else
2219 break;
2220 }
2221 }
topheap2222 feature_node top()
2223 {
2224 return a[0];
2225 }
2226 };
2227
2228 // A two-level coordinate descent algorithm for
2229 // a scaled one-class SVM dual problem
2230 //
2231 // min_\alpha 0.5(\alpha^T Q \alpha),
2232 // s.t. 0 <= \alpha_i <= 1 and
2233 // e^T \alpha = \nu l
2234 //
2235 // where Qij = xi^T xj
2236 //
2237 // Given:
2238 // x, nu
2239 // eps is the stopping tolerance
2240 //
2241 // solution will be put in w and rho
2242 //
2243 // this function returns the number of iterations
2244 //
2245 // See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.
2246
solve_oneclass_svm(const problem * prob,const parameter * param,double * w,double * rho)2247 static int solve_oneclass_svm(const problem *prob, const parameter *param, double *w, double *rho)
2248 {
2249 int l = prob->l;
2250 int w_size = prob->n;
2251 double eps = param->eps;
2252 double nu = param->nu;
2253 int i, j, s, iter = 0;
2254 double Gi, Gj;
2255 double Qij, quad_coef, delta, sum;
2256 double old_alpha_i;
2257 double *QD = new double[l];
2258 double *G = new double[l];
2259 int *index = new int[l];
2260 double *alpha = new double[l];
2261 int max_inner_iter;
2262 int max_iter = 1000;
2263 int active_size = l;
2264
2265 double negGmax; // max { -grad(f)_i | alpha_i < 1 }
2266 double negGmin; // min { -grad(f)_i | alpha_i > 0 }
2267
2268 int *most_violating_i = new int[l];
2269 int *most_violating_j = new int[l];
2270
2271 int n = (int)(nu*l); // # of alpha's at upper bound
2272 for(i=0; i<n; i++)
2273 alpha[i] = 1;
2274 if (n<l)
2275 alpha[i] = nu*l-n;
2276 for(i=n+1; i<l; i++)
2277 alpha[i] = 0;
2278
2279 for(i=0; i<w_size; i++)
2280 w[i] = 0;
2281 for(i=0; i<l; i++)
2282 {
2283 feature_node * const xi = prob->x[i];
2284 QD[i] = sparse_operator::nrm2_sq(xi);
2285 sparse_operator::axpy(alpha[i], xi, w);
2286
2287 index[i] = i;
2288 }
2289
2290 while (iter < max_iter)
2291 {
2292 negGmax = -INF;
2293 negGmin = INF;
2294
2295 for (s=0; s<active_size; s++)
2296 {
2297 i = index[s];
2298 feature_node * const xi = prob->x[i];
2299 G[i] = sparse_operator::dot(w, xi);
2300 if (alpha[i] < 1)
2301 negGmax = max(negGmax, -G[i]);
2302 if (alpha[i] > 0)
2303 negGmin = min(negGmin, -G[i]);
2304 }
2305
2306 if (negGmax - negGmin < eps)
2307 {
2308 if (active_size == l)
2309 break;
2310 else
2311 {
2312 active_size = l;
2313 info("*");
2314 continue;
2315 }
2316 }
2317
2318 for(s=0; s<active_size; s++)
2319 {
2320 i = index[s];
2321 if ((alpha[i] == 1 && -G[i] > negGmax) ||
2322 (alpha[i] == 0 && -G[i] < negGmin))
2323 {
2324 active_size--;
2325 swap(index[s], index[active_size]);
2326 s--;
2327 }
2328 }
2329
2330 max_inner_iter = max(active_size/10, 1);
2331 struct heap min_heap = heap(max_inner_iter, heap::MIN);
2332 struct heap max_heap = heap(max_inner_iter, heap::MAX);
2333 struct feature_node node;
2334 for(s=0; s<active_size; s++)
2335 {
2336 i = index[s];
2337 node.index = i;
2338 node.value = -G[i];
2339
2340 if (alpha[i] < 1)
2341 {
2342 if (min_heap.size() < max_inner_iter)
2343 min_heap.push(node);
2344 else if (min_heap.top().value < node.value)
2345 {
2346 min_heap.pop();
2347 min_heap.push(node);
2348 }
2349 }
2350
2351 if (alpha[i] > 0)
2352 {
2353 if (max_heap.size() < max_inner_iter)
2354 max_heap.push(node);
2355 else if (max_heap.top().value > node.value)
2356 {
2357 max_heap.pop();
2358 max_heap.push(node);
2359 }
2360 }
2361 }
2362 max_inner_iter = min(min_heap.size(), max_heap.size());
2363 while (max_heap.size() > max_inner_iter)
2364 max_heap.pop();
2365 while (min_heap.size() > max_inner_iter)
2366 min_heap.pop();
2367
2368 for (s=max_inner_iter-1; s>=0; s--)
2369 {
2370 most_violating_i[s] = min_heap.top().index;
2371 most_violating_j[s] = max_heap.top().index;
2372 min_heap.pop();
2373 max_heap.pop();
2374 }
2375
2376 for (s=0; s<max_inner_iter; s++)
2377 {
2378 i = most_violating_i[s];
2379 j = most_violating_j[s];
2380
2381 if ((alpha[i] == 0 && alpha[j] == 0) ||
2382 (alpha[i] == 1 && alpha[j] == 1))
2383 continue;
2384
2385 feature_node const * xi = prob->x[i];
2386 feature_node const * xj = prob->x[j];
2387
2388 Gi = sparse_operator::dot(w, xi);
2389 Gj = sparse_operator::dot(w, xj);
2390
2391 int violating_pair = 0;
2392 if (alpha[i] < 1 && alpha[j] > 0 && -Gj + 1e-12 < -Gi)
2393 violating_pair = 1;
2394 else
2395 if (alpha[i] > 0 && alpha[j] < 1 && -Gi + 1e-12 < -Gj)
2396 violating_pair = 1;
2397 if (violating_pair == 0)
2398 continue;
2399
2400 Qij = sparse_operator::sparse_dot(xi, xj);
2401 quad_coef = QD[i] + QD[j] - 2*Qij;
2402 if(quad_coef <= 0)
2403 quad_coef = 1e-12;
2404 delta = (Gi - Gj) / quad_coef;
2405 old_alpha_i = alpha[i];
2406 sum = alpha[i] + alpha[j];
2407 alpha[i] = alpha[i] - delta;
2408 alpha[j] = alpha[j] + delta;
2409 if (sum > 1)
2410 {
2411 if (alpha[i] > 1)
2412 {
2413 alpha[i] = 1;
2414 alpha[j] = sum - 1;
2415 }
2416 }
2417 else
2418 {
2419 if (alpha[j] < 0)
2420 {
2421 alpha[j] = 0;
2422 alpha[i] = sum;
2423 }
2424 }
2425 if (sum > 1)
2426 {
2427 if (alpha[j] > 1)
2428 {
2429 alpha[j] = 1;
2430 alpha[i] = sum - 1;
2431 }
2432 }
2433 else
2434 {
2435 if (alpha[i] < 0)
2436 {
2437 alpha[i] = 0;
2438 alpha[j] = sum;
2439 }
2440 }
2441 delta = alpha[i] - old_alpha_i;
2442 sparse_operator::axpy(delta, xi, w);
2443 sparse_operator::axpy(-delta, xj, w);
2444 }
2445 iter++;
2446 if (iter % 10 == 0)
2447 info(".");
2448 }
2449 info("\noptimization finished, #iter = %d\n",iter);
2450 if (iter >= max_iter)
2451 info("\nWARNING: reaching max number of iterations\n\n");
2452
2453 // calculate object value
2454 double v = 0;
2455 for(i=0; i<w_size; i++)
2456 v += w[i]*w[i];
2457 int nSV = 0;
2458 for(i=0; i<l; i++)
2459 {
2460 if (alpha[i] > 0)
2461 ++nSV;
2462 }
2463 info("Objective value = %lf\n", v/2);
2464 info("nSV = %d\n", nSV);
2465
2466 // calculate rho
2467 double nr_free = 0;
2468 double ub = INF, lb = -INF, sum_free = 0;
2469 for(i=0; i<l; i++)
2470 {
2471 double G = sparse_operator::dot(w, prob->x[i]);
2472 if (alpha[i] == 1)
2473 lb = max(lb, G);
2474 else if (alpha[i] == 0)
2475 ub = min(ub, G);
2476 else
2477 {
2478 ++nr_free;
2479 sum_free += G;
2480 }
2481 }
2482
2483 if (nr_free > 0)
2484 *rho = sum_free/nr_free;
2485 else
2486 *rho = (ub + lb)/2;
2487
2488 info("rho = %lf\n", *rho);
2489
2490 delete [] QD;
2491 delete [] G;
2492 delete [] index;
2493 delete [] alpha;
2494 delete [] most_violating_i;
2495 delete [] most_violating_j;
2496
2497 return iter;
2498 }
2499
2500 // transpose matrix X from row format to column format
transpose(const problem * prob,feature_node ** x_space_ret,problem * prob_col)2501 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2502 {
2503 int i;
2504 int l = prob->l;
2505 int n = prob->n;
2506 size_t nnz = 0;
2507 size_t *col_ptr = new size_t [n+1];
2508 feature_node *x_space;
2509 prob_col->l = l;
2510 prob_col->n = n;
2511 prob_col->y = new double[l];
2512 prob_col->x = new feature_node*[n];
2513
2514 for(i=0; i<l; i++)
2515 prob_col->y[i] = prob->y[i];
2516
2517 for(i=0; i<n+1; i++)
2518 col_ptr[i] = 0;
2519 for(i=0; i<l; i++)
2520 {
2521 feature_node *x = prob->x[i];
2522 while(x->index != -1)
2523 {
2524 nnz++;
2525 col_ptr[x->index]++;
2526 x++;
2527 }
2528 }
2529 for(i=1; i<n+1; i++)
2530 col_ptr[i] += col_ptr[i-1] + 1;
2531
2532 x_space = new feature_node[nnz+n];
2533 for(i=0; i<n; i++)
2534 prob_col->x[i] = &x_space[col_ptr[i]];
2535
2536 for(i=0; i<l; i++)
2537 {
2538 feature_node *x = prob->x[i];
2539 while(x->index != -1)
2540 {
2541 int ind = x->index-1;
2542 x_space[col_ptr[ind]].index = i+1; // starts from 1
2543 x_space[col_ptr[ind]].value = x->value;
2544 col_ptr[ind]++;
2545 x++;
2546 }
2547 }
2548 for(i=0; i<n; i++)
2549 x_space[col_ptr[i]].index = -1;
2550
2551 *x_space_ret = x_space;
2552
2553 delete [] col_ptr;
2554 }
2555
2556 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2557 // perm, length l, must be allocated before calling this subroutine
group_classes(const problem * prob,int * nr_class_ret,int ** label_ret,int ** start_ret,int ** count_ret,int * perm)2558 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2559 {
2560 int l = prob->l;
2561 int max_nr_class = 16;
2562 int nr_class = 0;
2563 int *label = Malloc(int,max_nr_class);
2564 int *count = Malloc(int,max_nr_class);
2565 int *data_label = Malloc(int,l);
2566 int i;
2567
2568 for(i=0;i<l;i++)
2569 {
2570 int this_label = (int)prob->y[i];
2571 int j;
2572 for(j=0;j<nr_class;j++)
2573 {
2574 if(this_label == label[j])
2575 {
2576 ++count[j];
2577 break;
2578 }
2579 }
2580 data_label[i] = j;
2581 if(j == nr_class)
2582 {
2583 if(nr_class == max_nr_class)
2584 {
2585 max_nr_class *= 2;
2586 label = (int *)realloc(label,max_nr_class*sizeof(int));
2587 count = (int *)realloc(count,max_nr_class*sizeof(int));
2588 }
2589 label[nr_class] = this_label;
2590 count[nr_class] = 1;
2591 ++nr_class;
2592 }
2593 }
2594
2595 //
2596 // Labels are ordered by their first occurrence in the training set.
2597 // However, for two-class sets with -1/+1 labels and -1 appears first,
2598 // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2599 //
2600 if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2601 {
2602 swap(label[0],label[1]);
2603 swap(count[0],count[1]);
2604 for(i=0;i<l;i++)
2605 {
2606 if(data_label[i] == 0)
2607 data_label[i] = 1;
2608 else
2609 data_label[i] = 0;
2610 }
2611 }
2612
2613 int *start = Malloc(int,nr_class);
2614 start[0] = 0;
2615 for(i=1;i<nr_class;i++)
2616 start[i] = start[i-1]+count[i-1];
2617 for(i=0;i<l;i++)
2618 {
2619 perm[start[data_label[i]]] = i;
2620 ++start[data_label[i]];
2621 }
2622 start[0] = 0;
2623 for(i=1;i<nr_class;i++)
2624 start[i] = start[i-1]+count[i-1];
2625
2626 *nr_class_ret = nr_class;
2627 *label_ret = label;
2628 *start_ret = start;
2629 *count_ret = count;
2630 free(data_label);
2631 }
2632
train_one(const problem * prob,const parameter * param,double * w,double Cp,double Cn)2633 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2634 {
2635 int solver_type = param->solver_type;
2636 int dual_solver_max_iter = 300;
2637 int iter;
2638
2639 bool is_regression = (solver_type==L2R_L2LOSS_SVR ||
2640 solver_type==L2R_L1LOSS_SVR_DUAL ||
2641 solver_type==L2R_L2LOSS_SVR_DUAL);
2642
2643 // Some solvers use Cp,Cn but not C array; extensions possible but no plan for now
2644 double *C = new double[prob->l];
2645 double primal_solver_tol = param->eps;
2646 if(is_regression)
2647 {
2648 for(int i=0;i<prob->l;i++)
2649 C[i] = param->C;
2650 }
2651 else
2652 {
2653 int pos = 0;
2654 for(int i=0;i<prob->l;i++)
2655 {
2656 if(prob->y[i] > 0)
2657 {
2658 pos++;
2659 C[i] = Cp;
2660 }
2661 else
2662 C[i] = Cn;
2663 }
2664 int neg = prob->l - pos;
2665 primal_solver_tol = param->eps*max(min(pos,neg), 1)/prob->l;
2666 }
2667
2668 switch(solver_type)
2669 {
2670 case L2R_LR:
2671 {
2672 l2r_lr_fun fun_obj(prob, param, C);
2673 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2674 newton_obj.set_print_string(liblinear_print_string);
2675 newton_obj.newton(w);
2676 break;
2677 }
2678 case L2R_L2LOSS_SVC:
2679 {
2680 l2r_l2_svc_fun fun_obj(prob, param, C);
2681 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2682 newton_obj.set_print_string(liblinear_print_string);
2683 newton_obj.newton(w);
2684 break;
2685 }
2686 case L2R_L2LOSS_SVC_DUAL:
2687 {
2688 iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
2689 if(iter >= dual_solver_max_iter)
2690 {
2691 info("\nWARNING: reaching max number of iterations\nSwitching to use -s 2\n\n");
2692 // primal_solver_tol obtained from eps for dual may be too loose
2693 primal_solver_tol *= 0.1;
2694 l2r_l2_svc_fun fun_obj(prob, param, C);
2695 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2696 newton_obj.set_print_string(liblinear_print_string);
2697 newton_obj.newton(w);
2698 }
2699 break;
2700 }
2701 case L2R_L1LOSS_SVC_DUAL:
2702 {
2703 iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
2704 if(iter >= dual_solver_max_iter)
2705 info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
2706 break;
2707 }
2708 case L1R_L2LOSS_SVC:
2709 {
2710 problem prob_col;
2711 feature_node *x_space = NULL;
2712 transpose(prob, &x_space ,&prob_col);
2713 solve_l1r_l2_svc(&prob_col, param, w, Cp, Cn, primal_solver_tol);
2714 delete [] prob_col.y;
2715 delete [] prob_col.x;
2716 delete [] x_space;
2717 break;
2718 }
2719 case L1R_LR:
2720 {
2721 problem prob_col;
2722 feature_node *x_space = NULL;
2723 transpose(prob, &x_space ,&prob_col);
2724 solve_l1r_lr(&prob_col, param, w, Cp, Cn, primal_solver_tol);
2725 delete [] prob_col.y;
2726 delete [] prob_col.x;
2727 delete [] x_space;
2728 break;
2729 }
2730 case L2R_LR_DUAL:
2731 {
2732 iter = solve_l2r_lr_dual(prob, param, w, Cp, Cn, dual_solver_max_iter);
2733 if(iter >= dual_solver_max_iter)
2734 {
2735 info("\nWARNING: reaching max number of iterations\nSwitching to use -s 0\n\n");
2736 // primal_solver_tol obtained from eps for dual may be too loose
2737 primal_solver_tol *= 0.1;
2738 l2r_lr_fun fun_obj(prob, param, C);
2739 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2740 newton_obj.set_print_string(liblinear_print_string);
2741 newton_obj.newton(w);
2742 }
2743 break;
2744 }
2745 case L2R_L2LOSS_SVR:
2746 {
2747 l2r_l2_svr_fun fun_obj(prob, param, C);
2748 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2749 newton_obj.set_print_string(liblinear_print_string);
2750 newton_obj.newton(w);
2751 break;
2752 }
2753 case L2R_L1LOSS_SVR_DUAL:
2754 {
2755 iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
2756 if(iter >= dual_solver_max_iter)
2757 info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster (also see FAQ)\n\n");
2758
2759 break;
2760 }
2761 case L2R_L2LOSS_SVR_DUAL:
2762 {
2763 iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
2764 if(iter >= dual_solver_max_iter)
2765 {
2766 info("\nWARNING: reaching max number of iterations\nSwitching to use -s 11\n\n");
2767 // primal_solver_tol obtained from eps for dual may be too loose
2768 primal_solver_tol *= 0.001;
2769 l2r_l2_svr_fun fun_obj(prob, param, C);
2770 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2771 newton_obj.set_print_string(liblinear_print_string);
2772 newton_obj.newton(w);
2773 }
2774 break;
2775 }
2776 default:
2777 fprintf(stderr, "ERROR: unknown solver_type\n");
2778 break;
2779 }
2780
2781 delete[] C;
2782 }
2783
2784 // Calculate the initial C for parameter selection
calc_start_C(const problem * prob,const parameter * param)2785 static double calc_start_C(const problem *prob, const parameter *param)
2786 {
2787 int i;
2788 double xTx, max_xTx;
2789 max_xTx = 0;
2790 for(i=0; i<prob->l; i++)
2791 {
2792 xTx = 0;
2793 feature_node *xi=prob->x[i];
2794 while(xi->index != -1)
2795 {
2796 double val = xi->value;
2797 xTx += val*val;
2798 xi++;
2799 }
2800 if(xTx > max_xTx)
2801 max_xTx = xTx;
2802 }
2803
2804 double min_C = 1.0;
2805 if(param->solver_type == L2R_LR)
2806 min_C = 1.0 / (prob->l * max_xTx);
2807 else if(param->solver_type == L2R_L2LOSS_SVC)
2808 min_C = 1.0 / (2 * prob->l * max_xTx);
2809 else if(param->solver_type == L2R_L2LOSS_SVR)
2810 {
2811 double sum_y, loss, y_abs;
2812 double delta2 = 0.1;
2813 sum_y = 0, loss = 0;
2814 for(i=0; i<prob->l; i++)
2815 {
2816 y_abs = fabs(prob->y[i]);
2817 sum_y += y_abs;
2818 loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
2819 }
2820 if(loss > 0)
2821 min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
2822 else
2823 min_C = INF;
2824 }
2825
2826 return pow( 2, floor(log(min_C) / log(2.0)) );
2827 }
2828
calc_max_p(const problem * prob)2829 static double calc_max_p(const problem *prob)
2830 {
2831 int i;
2832 double max_p = 0.0;
2833 for(i = 0; i < prob->l; i++)
2834 max_p = max(max_p, fabs(prob->y[i]));
2835
2836 return max_p;
2837 }
2838
find_parameter_C(const problem * prob,parameter * param_tmp,double start_C,double max_C,double * best_C,double * best_score,const int * fold_start,const int * perm,const problem * subprob,int nr_fold)2839 static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
2840 {
2841 // variables for CV
2842 int i;
2843 double *target = Malloc(double, prob->l);
2844
2845 // variables for warm start
2846 double ratio = 2;
2847 double **prev_w = Malloc(double*, nr_fold);
2848 for(i = 0; i < nr_fold; i++)
2849 prev_w[i] = NULL;
2850 int num_unchanged_w = 0;
2851 void (*default_print_string) (const char *) = liblinear_print_string;
2852
2853 if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2854 *best_score = 0.0;
2855 else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2856 *best_score = INF;
2857 *best_C = start_C;
2858
2859 param_tmp->C = start_C;
2860 while(param_tmp->C <= max_C)
2861 {
2862 //Output disabled for running CV at a particular C
2863 set_print_string_function(&print_null);
2864
2865 for(i=0; i<nr_fold; i++)
2866 {
2867 int j;
2868 int begin = fold_start[i];
2869 int end = fold_start[i+1];
2870
2871 param_tmp->init_sol = prev_w[i];
2872 struct model *submodel = train(&subprob[i],param_tmp);
2873
2874 int total_w_size;
2875 if(submodel->nr_class == 2)
2876 total_w_size = subprob[i].n;
2877 else
2878 total_w_size = subprob[i].n * submodel->nr_class;
2879
2880 if(prev_w[i] == NULL)
2881 {
2882 prev_w[i] = Malloc(double, total_w_size);
2883 for(j=0; j<total_w_size; j++)
2884 prev_w[i][j] = submodel->w[j];
2885 }
2886 else if(num_unchanged_w >= 0)
2887 {
2888 double norm_w_diff = 0;
2889 for(j=0; j<total_w_size; j++)
2890 {
2891 norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2892 prev_w[i][j] = submodel->w[j];
2893 }
2894 norm_w_diff = sqrt(norm_w_diff);
2895
2896 if(norm_w_diff > 1e-15)
2897 num_unchanged_w = -1;
2898 }
2899 else
2900 {
2901 for(j=0; j<total_w_size; j++)
2902 prev_w[i][j] = submodel->w[j];
2903 }
2904
2905 for(j=begin; j<end; j++)
2906 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2907
2908 free_and_destroy_model(&submodel);
2909 }
2910 set_print_string_function(default_print_string);
2911
2912 if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2913 {
2914 int total_correct = 0;
2915 for(i=0; i<prob->l; i++)
2916 if(target[i] == prob->y[i])
2917 ++total_correct;
2918 double current_rate = (double)total_correct/prob->l;
2919 if(current_rate > *best_score)
2920 {
2921 *best_C = param_tmp->C;
2922 *best_score = current_rate;
2923 }
2924
2925 info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
2926 }
2927 else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2928 {
2929 double total_error = 0.0;
2930 for(i=0; i<prob->l; i++)
2931 {
2932 double y = prob->y[i];
2933 double v = target[i];
2934 total_error += (v-y)*(v-y);
2935 }
2936 double current_error = total_error/prob->l;
2937 if(current_error < *best_score)
2938 {
2939 *best_C = param_tmp->C;
2940 *best_score = current_error;
2941 }
2942
2943 info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
2944 }
2945
2946 num_unchanged_w++;
2947 if(num_unchanged_w == 5)
2948 break;
2949 param_tmp->C = param_tmp->C*ratio;
2950 }
2951
2952 if(param_tmp->C > max_C)
2953 info("WARNING: maximum C reached.\n");
2954 free(target);
2955 for(i=0; i<nr_fold; i++)
2956 free(prev_w[i]);
2957 free(prev_w);
2958 }
2959
2960
2961 //
2962 // Interface functions
2963 //
train(const problem * prob,const parameter * param)2964 model* train(const problem *prob, const parameter *param)
2965 {
2966 int i,j;
2967 int l = prob->l;
2968 int n = prob->n;
2969 int w_size = prob->n;
2970 model *model_ = Malloc(model,1);
2971
2972 if(prob->bias>=0)
2973 model_->nr_feature=n-1;
2974 else
2975 model_->nr_feature=n;
2976 model_->param = *param;
2977 model_->bias = prob->bias;
2978
2979 if(check_regression_model(model_))
2980 {
2981 model_->w = Malloc(double, w_size);
2982
2983 if(param->init_sol != NULL)
2984 for(i=0;i<w_size;i++)
2985 model_->w[i] = param->init_sol[i];
2986 else
2987 for(i=0;i<w_size;i++)
2988 model_->w[i] = 0;
2989
2990 model_->nr_class = 2;
2991 model_->label = NULL;
2992 train_one(prob, param, model_->w, 0, 0);
2993 }
2994 else if(check_oneclass_model(model_))
2995 {
2996 model_->w = Malloc(double, w_size);
2997 model_->nr_class = 2;
2998 model_->label = NULL;
2999 solve_oneclass_svm(prob, param, model_->w, &(model_->rho));
3000 }
3001 else
3002 {
3003 int nr_class;
3004 int *label = NULL;
3005 int *start = NULL;
3006 int *count = NULL;
3007 int *perm = Malloc(int,l);
3008
3009 // group training data of the same class
3010 group_classes(prob,&nr_class,&label,&start,&count,perm);
3011
3012 model_->nr_class=nr_class;
3013 model_->label = Malloc(int,nr_class);
3014 for(i=0;i<nr_class;i++)
3015 model_->label[i] = label[i];
3016
3017 // calculate weighted C
3018 double *weighted_C = Malloc(double, nr_class);
3019 for(i=0;i<nr_class;i++)
3020 weighted_C[i] = param->C;
3021 for(i=0;i<param->nr_weight;i++)
3022 {
3023 for(j=0;j<nr_class;j++)
3024 if(param->weight_label[i] == label[j])
3025 break;
3026 if(j == nr_class)
3027 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
3028 else
3029 weighted_C[j] *= param->weight[i];
3030 }
3031
3032 // constructing the subproblem
3033 feature_node **x = Malloc(feature_node *,l);
3034 for(i=0;i<l;i++)
3035 x[i] = prob->x[perm[i]];
3036
3037 int k;
3038 problem sub_prob;
3039 sub_prob.l = l;
3040 sub_prob.n = n;
3041 sub_prob.x = Malloc(feature_node *,sub_prob.l);
3042 sub_prob.y = Malloc(double,sub_prob.l);
3043
3044 for(k=0; k<sub_prob.l; k++)
3045 sub_prob.x[k] = x[k];
3046
3047 // multi-class svm by Crammer and Singer
3048 if(param->solver_type == MCSVM_CS)
3049 {
3050 model_->w=Malloc(double, n*nr_class);
3051 for(i=0;i<nr_class;i++)
3052 for(j=start[i];j<start[i]+count[i];j++)
3053 sub_prob.y[j] = i;
3054 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
3055 Solver.Solve(model_->w);
3056 }
3057 else
3058 {
3059 if(nr_class == 2)
3060 {
3061 model_->w=Malloc(double, w_size);
3062
3063 int e0 = start[0]+count[0];
3064 k=0;
3065 for(; k<e0; k++)
3066 sub_prob.y[k] = +1;
3067 for(; k<sub_prob.l; k++)
3068 sub_prob.y[k] = -1;
3069
3070 if(param->init_sol != NULL)
3071 for(i=0;i<w_size;i++)
3072 model_->w[i] = param->init_sol[i];
3073 else
3074 for(i=0;i<w_size;i++)
3075 model_->w[i] = 0;
3076
3077 train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
3078 }
3079 else
3080 {
3081 model_->w=Malloc(double, w_size*nr_class);
3082 double *w=Malloc(double, w_size);
3083 for(i=0;i<nr_class;i++)
3084 {
3085 int si = start[i];
3086 int ei = si+count[i];
3087
3088 k=0;
3089 for(; k<si; k++)
3090 sub_prob.y[k] = -1;
3091 for(; k<ei; k++)
3092 sub_prob.y[k] = +1;
3093 for(; k<sub_prob.l; k++)
3094 sub_prob.y[k] = -1;
3095
3096 if(param->init_sol != NULL)
3097 for(j=0;j<w_size;j++)
3098 w[j] = param->init_sol[j*nr_class+i];
3099 else
3100 for(j=0;j<w_size;j++)
3101 w[j] = 0;
3102
3103 train_one(&sub_prob, param, w, weighted_C[i], param->C);
3104
3105 for(j=0;j<w_size;j++)
3106 model_->w[j*nr_class+i] = w[j];
3107 }
3108 free(w);
3109 }
3110
3111 }
3112
3113 free(x);
3114 free(label);
3115 free(start);
3116 free(count);
3117 free(perm);
3118 free(sub_prob.x);
3119 free(sub_prob.y);
3120 free(weighted_C);
3121 }
3122 return model_;
3123 }
3124
cross_validation(const problem * prob,const parameter * param,int nr_fold,double * target)3125 void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
3126 {
3127 int i;
3128 int *fold_start;
3129 int l = prob->l;
3130 int *perm = Malloc(int,l);
3131 if (nr_fold > l)
3132 {
3133 nr_fold = l;
3134 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3135 }
3136 fold_start = Malloc(int,nr_fold+1);
3137 for(i=0;i<l;i++) perm[i]=i;
3138 for(i=0;i<l;i++)
3139 {
3140 int j = i+rand()%(l-i);
3141 swap(perm[i],perm[j]);
3142 }
3143 for(i=0;i<=nr_fold;i++)
3144 fold_start[i]=i*l/nr_fold;
3145
3146 for(i=0;i<nr_fold;i++)
3147 {
3148 int begin = fold_start[i];
3149 int end = fold_start[i+1];
3150 int j,k;
3151 struct problem subprob;
3152
3153 subprob.bias = prob->bias;
3154 subprob.n = prob->n;
3155 subprob.l = l-(end-begin);
3156 subprob.x = Malloc(struct feature_node*,subprob.l);
3157 subprob.y = Malloc(double,subprob.l);
3158
3159 k=0;
3160 for(j=0;j<begin;j++)
3161 {
3162 subprob.x[k] = prob->x[perm[j]];
3163 subprob.y[k] = prob->y[perm[j]];
3164 ++k;
3165 }
3166 for(j=end;j<l;j++)
3167 {
3168 subprob.x[k] = prob->x[perm[j]];
3169 subprob.y[k] = prob->y[perm[j]];
3170 ++k;
3171 }
3172 struct model *submodel = train(&subprob,param);
3173 for(j=begin;j<end;j++)
3174 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
3175 free_and_destroy_model(&submodel);
3176 free(subprob.x);
3177 free(subprob.y);
3178 }
3179 free(fold_start);
3180 free(perm);
3181 }
3182
3183
find_parameters(const problem * prob,const parameter * param,int nr_fold,double start_C,double start_p,double * best_C,double * best_p,double * best_score)3184 void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
3185 {
3186 // prepare CV folds
3187
3188 int i;
3189 int *fold_start;
3190 int l = prob->l;
3191 int *perm = Malloc(int, l);
3192 struct problem *subprob = Malloc(problem,nr_fold);
3193
3194 if (nr_fold > l)
3195 {
3196 nr_fold = l;
3197 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3198 }
3199 fold_start = Malloc(int,nr_fold+1);
3200 for(i=0;i<l;i++) perm[i]=i;
3201 for(i=0;i<l;i++)
3202 {
3203 int j = i+rand()%(l-i);
3204 swap(perm[i],perm[j]);
3205 }
3206 for(i=0;i<=nr_fold;i++)
3207 fold_start[i]=i*l/nr_fold;
3208
3209 for(i=0;i<nr_fold;i++)
3210 {
3211 int begin = fold_start[i];
3212 int end = fold_start[i+1];
3213 int j,k;
3214
3215 subprob[i].bias = prob->bias;
3216 subprob[i].n = prob->n;
3217 subprob[i].l = l-(end-begin);
3218 subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
3219 subprob[i].y = Malloc(double,subprob[i].l);
3220
3221 k=0;
3222 for(j=0;j<begin;j++)
3223 {
3224 subprob[i].x[k] = prob->x[perm[j]];
3225 subprob[i].y[k] = prob->y[perm[j]];
3226 ++k;
3227 }
3228 for(j=end;j<l;j++)
3229 {
3230 subprob[i].x[k] = prob->x[perm[j]];
3231 subprob[i].y[k] = prob->y[perm[j]];
3232 ++k;
3233 }
3234 }
3235
3236 struct parameter param_tmp = *param;
3237 *best_p = -1;
3238 if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
3239 {
3240 if(start_C <= 0)
3241 start_C = calc_start_C(prob, ¶m_tmp);
3242 double max_C = 1024;
3243 start_C = min(start_C, max_C);
3244 double best_C_tmp, best_score_tmp;
3245
3246 find_parameter_C(prob, ¶m_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3247
3248 *best_C = best_C_tmp;
3249 *best_score = best_score_tmp;
3250 }
3251 else if(param->solver_type == L2R_L2LOSS_SVR)
3252 {
3253 double max_p = calc_max_p(prob);
3254 int num_p_steps = 20;
3255 double max_C = 1048576;
3256 *best_score = INF;
3257
3258 i = num_p_steps-1;
3259 if(start_p > 0)
3260 i = min((int)(start_p/(max_p/num_p_steps)), i);
3261 for(; i >= 0; i--)
3262 {
3263 param_tmp.p = i*max_p/num_p_steps;
3264 double start_C_tmp;
3265 if(start_C <= 0)
3266 start_C_tmp = calc_start_C(prob, ¶m_tmp);
3267 else
3268 start_C_tmp = start_C;
3269 start_C_tmp = min(start_C_tmp, max_C);
3270 double best_C_tmp, best_score_tmp;
3271
3272 find_parameter_C(prob, ¶m_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3273
3274 if(best_score_tmp < *best_score)
3275 {
3276 *best_p = param_tmp.p;
3277 *best_C = best_C_tmp;
3278 *best_score = best_score_tmp;
3279 }
3280 }
3281 }
3282
3283 free(fold_start);
3284 free(perm);
3285 for(i=0; i<nr_fold; i++)
3286 {
3287 free(subprob[i].x);
3288 free(subprob[i].y);
3289 }
3290 free(subprob);
3291 }
3292
predict_values(const struct model * model_,const struct feature_node * x,double * dec_values)3293 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
3294 {
3295 int idx;
3296 int n;
3297 if(model_->bias>=0)
3298 n=model_->nr_feature+1;
3299 else
3300 n=model_->nr_feature;
3301 double *w=model_->w;
3302 int nr_class=model_->nr_class;
3303 int i;
3304 int nr_w;
3305 if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
3306 nr_w = 1;
3307 else
3308 nr_w = nr_class;
3309
3310 const feature_node *lx=x;
3311 for(i=0;i<nr_w;i++)
3312 dec_values[i] = 0;
3313 for(; (idx=lx->index)!=-1; lx++)
3314 {
3315 // the dimension of testing data may exceed that of training
3316 if(idx<=n)
3317 for(i=0;i<nr_w;i++)
3318 dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
3319 }
3320 if(check_oneclass_model(model_))
3321 dec_values[0] -= model_->rho;
3322
3323 if(nr_class==2)
3324 {
3325 if(check_regression_model(model_))
3326 return dec_values[0];
3327 else if(check_oneclass_model(model_))
3328 return (dec_values[0]>0)?1:-1;
3329 else
3330 return (dec_values[0]>0)?model_->label[0]:model_->label[1];
3331 }
3332 else
3333 {
3334 int dec_max_idx = 0;
3335 for(i=1;i<nr_class;i++)
3336 {
3337 if(dec_values[i] > dec_values[dec_max_idx])
3338 dec_max_idx = i;
3339 }
3340 return model_->label[dec_max_idx];
3341 }
3342 }
3343
predict(const model * model_,const feature_node * x)3344 double predict(const model *model_, const feature_node *x)
3345 {
3346 double *dec_values = Malloc(double, model_->nr_class);
3347 double label=predict_values(model_, x, dec_values);
3348 free(dec_values);
3349 return label;
3350 }
3351
predict_probability(const struct model * model_,const struct feature_node * x,double * prob_estimates)3352 double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
3353 {
3354 if(check_probability_model(model_))
3355 {
3356 int i;
3357 int nr_class=model_->nr_class;
3358 int nr_w;
3359 if(nr_class==2)
3360 nr_w = 1;
3361 else
3362 nr_w = nr_class;
3363
3364 double label=predict_values(model_, x, prob_estimates);
3365 for(i=0;i<nr_w;i++)
3366 prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
3367
3368 if(nr_class==2) // for binary classification
3369 prob_estimates[1]=1.-prob_estimates[0];
3370 else
3371 {
3372 double sum=0;
3373 for(i=0; i<nr_class; i++)
3374 sum+=prob_estimates[i];
3375
3376 for(i=0; i<nr_class; i++)
3377 prob_estimates[i]=prob_estimates[i]/sum;
3378 }
3379
3380 return label;
3381 }
3382 else
3383 return 0;
3384 }
3385
3386 static const char *solver_type_table[]=
3387 {
3388 "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
3389 "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
3390 "", "", "",
3391 "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL",
3392 "", "", "", "", "", "", "",
3393 "ONECLASS_SVM", NULL
3394 };
3395
save_model(const char * model_file_name,const struct model * model_)3396 int save_model(const char *model_file_name, const struct model *model_)
3397 {
3398 int i;
3399 int nr_feature=model_->nr_feature;
3400 int n;
3401 const parameter& param = model_->param;
3402
3403 if(model_->bias>=0)
3404 n=nr_feature+1;
3405 else
3406 n=nr_feature;
3407 int w_size = n;
3408 FILE *fp = fopen(model_file_name,"w");
3409 if(fp==NULL) return -1;
3410
3411 char *old_locale = setlocale(LC_ALL, NULL);
3412 if (old_locale)
3413 {
3414 old_locale = strdup(old_locale);
3415 }
3416 setlocale(LC_ALL, "C");
3417
3418 int nr_w;
3419 if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
3420 nr_w=1;
3421 else
3422 nr_w=model_->nr_class;
3423
3424 fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
3425 fprintf(fp, "nr_class %d\n", model_->nr_class);
3426
3427 if(model_->label)
3428 {
3429 fprintf(fp, "label");
3430 for(i=0; i<model_->nr_class; i++)
3431 fprintf(fp, " %d", model_->label[i]);
3432 fprintf(fp, "\n");
3433 }
3434
3435 fprintf(fp, "nr_feature %d\n", nr_feature);
3436
3437 fprintf(fp, "bias %.17g\n", model_->bias);
3438
3439 if(check_oneclass_model(model_))
3440 fprintf(fp, "rho %.17g\n", model_->rho);
3441
3442 fprintf(fp, "w\n");
3443 for(i=0; i<w_size; i++)
3444 {
3445 int j;
3446 for(j=0; j<nr_w; j++)
3447 fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
3448 fprintf(fp, "\n");
3449 }
3450
3451 setlocale(LC_ALL, old_locale);
3452 free(old_locale);
3453
3454 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
3455 else return 0;
3456 }
3457
3458 //
3459 // FSCANF helps to handle fscanf failures.
3460 // Its do-while block avoids the ambiguity when
3461 // if (...)
3462 // FSCANF();
3463 // is used
3464 //
3465 #define FSCANF(_stream, _format, _var)do\
3466 {\
3467 if (fscanf(_stream, _format, _var) != 1)\
3468 {\
3469 fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
3470 EXIT_LOAD_MODEL()\
3471 }\
3472 }while(0)
3473 // EXIT_LOAD_MODEL should NOT end with a semicolon.
3474 #define EXIT_LOAD_MODEL()\
3475 {\
3476 setlocale(LC_ALL, old_locale);\
3477 free(model_->label);\
3478 free(model_);\
3479 free(old_locale);\
3480 return NULL;\
3481 }
load_model(const char * model_file_name)3482 struct model *load_model(const char *model_file_name)
3483 {
3484 FILE *fp = fopen(model_file_name,"r");
3485 if(fp==NULL) return NULL;
3486
3487 int i;
3488 int nr_feature;
3489 int n;
3490 int nr_class;
3491 double bias;
3492 double rho;
3493 model *model_ = Malloc(model,1);
3494 parameter& param = model_->param;
3495 // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
3496 param.nr_weight = 0;
3497 param.weight_label = NULL;
3498 param.weight = NULL;
3499 param.init_sol = NULL;
3500
3501 model_->label = NULL;
3502
3503 char *old_locale = setlocale(LC_ALL, NULL);
3504 if (old_locale)
3505 {
3506 old_locale = strdup(old_locale);
3507 }
3508 setlocale(LC_ALL, "C");
3509
3510 char cmd[81];
3511 while(1)
3512 {
3513 FSCANF(fp,"%80s",cmd);
3514 if(strcmp(cmd,"solver_type")==0)
3515 {
3516 FSCANF(fp,"%80s",cmd);
3517 int i;
3518 for(i=0;solver_type_table[i];i++)
3519 {
3520 if(strcmp(solver_type_table[i],cmd)==0)
3521 {
3522 param.solver_type=i;
3523 break;
3524 }
3525 }
3526 if(solver_type_table[i] == NULL)
3527 {
3528 fprintf(stderr,"unknown solver type.\n");
3529 EXIT_LOAD_MODEL()
3530 }
3531 }
3532 else if(strcmp(cmd,"nr_class")==0)
3533 {
3534 FSCANF(fp,"%d",&nr_class);
3535 model_->nr_class=nr_class;
3536 }
3537 else if(strcmp(cmd,"nr_feature")==0)
3538 {
3539 FSCANF(fp,"%d",&nr_feature);
3540 model_->nr_feature=nr_feature;
3541 }
3542 else if(strcmp(cmd,"bias")==0)
3543 {
3544 FSCANF(fp,"%lf",&bias);
3545 model_->bias=bias;
3546 }
3547 else if(strcmp(cmd,"rho")==0)
3548 {
3549 FSCANF(fp,"%lf",&rho);
3550 model_->rho=rho;
3551 }
3552 else if(strcmp(cmd,"w")==0)
3553 {
3554 break;
3555 }
3556 else if(strcmp(cmd,"label")==0)
3557 {
3558 int nr_class = model_->nr_class;
3559 model_->label = Malloc(int,nr_class);
3560 for(int i=0;i<nr_class;i++)
3561 FSCANF(fp,"%d",&model_->label[i]);
3562 }
3563 else
3564 {
3565 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
3566 EXIT_LOAD_MODEL()
3567 }
3568 }
3569
3570 nr_feature=model_->nr_feature;
3571 if(model_->bias>=0)
3572 n=nr_feature+1;
3573 else
3574 n=nr_feature;
3575 int w_size = n;
3576 int nr_w;
3577 if(nr_class==2 && param.solver_type != MCSVM_CS)
3578 nr_w = 1;
3579 else
3580 nr_w = nr_class;
3581
3582 model_->w=Malloc(double, w_size*nr_w);
3583 for(i=0; i<w_size; i++)
3584 {
3585 int j;
3586 for(j=0; j<nr_w; j++)
3587 FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
3588 }
3589
3590 setlocale(LC_ALL, old_locale);
3591 free(old_locale);
3592
3593 if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
3594
3595 return model_;
3596 }
3597
get_nr_feature(const model * model_)3598 int get_nr_feature(const model *model_)
3599 {
3600 return model_->nr_feature;
3601 }
3602
get_nr_class(const model * model_)3603 int get_nr_class(const model *model_)
3604 {
3605 return model_->nr_class;
3606 }
3607
get_labels(const model * model_,int * label)3608 void get_labels(const model *model_, int* label)
3609 {
3610 if (model_->label != NULL)
3611 for(int i=0;i<model_->nr_class;i++)
3612 label[i] = model_->label[i];
3613 }
3614
3615 // use inline here for better performance (around 20% faster than the non-inline one)
get_w_value(const struct model * model_,int idx,int label_idx)3616 static inline double get_w_value(const struct model *model_, int idx, int label_idx)
3617 {
3618 int nr_class = model_->nr_class;
3619 int solver_type = model_->param.solver_type;
3620 const double *w = model_->w;
3621
3622 if(idx < 0 || idx > model_->nr_feature)
3623 return 0;
3624 if(check_regression_model(model_) || check_oneclass_model(model_))
3625 return w[idx];
3626 else
3627 {
3628 if(label_idx < 0 || label_idx >= nr_class)
3629 return 0;
3630 if(nr_class == 2 && solver_type != MCSVM_CS)
3631 {
3632 if(label_idx == 0)
3633 return w[idx];
3634 else
3635 return -w[idx];
3636 }
3637 else
3638 return w[idx*nr_class+label_idx];
3639 }
3640 }
3641
3642 // feat_idx: starting from 1 to nr_feature
3643 // label_idx: starting from 0 to nr_class-1 for classification models;
3644 // for regression and one-class SVM models, label_idx is
3645 // ignored.
get_decfun_coef(const struct model * model_,int feat_idx,int label_idx)3646 double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3647 {
3648 if(feat_idx > model_->nr_feature)
3649 return 0;
3650 return get_w_value(model_, feat_idx-1, label_idx);
3651 }
3652
get_decfun_bias(const struct model * model_,int label_idx)3653 double get_decfun_bias(const struct model *model_, int label_idx)
3654 {
3655 if(check_oneclass_model(model_))
3656 {
3657 fprintf(stderr, "ERROR: get_decfun_bias can not be called for a one-class SVM model\n");
3658 return 0;
3659 }
3660 int bias_idx = model_->nr_feature;
3661 double bias = model_->bias;
3662 if(bias <= 0)
3663 return 0;
3664 else
3665 return bias*get_w_value(model_, bias_idx, label_idx);
3666 }
3667
get_decfun_rho(const struct model * model_)3668 double get_decfun_rho(const struct model *model_)
3669 {
3670 if(check_oneclass_model(model_))
3671 return model_->rho;
3672 else
3673 {
3674 fprintf(stderr, "ERROR: get_decfun_rho can be called only for a one-class SVM model\n");
3675 return 0;
3676 }
3677 }
3678
free_model_content(struct model * model_ptr)3679 void free_model_content(struct model *model_ptr)
3680 {
3681 if(model_ptr->w != NULL)
3682 free(model_ptr->w);
3683 if(model_ptr->label != NULL)
3684 free(model_ptr->label);
3685 }
3686
free_and_destroy_model(struct model ** model_ptr_ptr)3687 void free_and_destroy_model(struct model **model_ptr_ptr)
3688 {
3689 struct model *model_ptr = *model_ptr_ptr;
3690 if(model_ptr != NULL)
3691 {
3692 free_model_content(model_ptr);
3693 free(model_ptr);
3694 }
3695 }
3696
destroy_param(parameter * param)3697 void destroy_param(parameter* param)
3698 {
3699 if(param->weight_label != NULL)
3700 free(param->weight_label);
3701 if(param->weight != NULL)
3702 free(param->weight);
3703 if(param->init_sol != NULL)
3704 free(param->init_sol);
3705 }
3706
check_parameter(const problem * prob,const parameter * param)3707 const char *check_parameter(const problem *prob, const parameter *param)
3708 {
3709 if(param->eps <= 0)
3710 return "eps <= 0";
3711
3712 if(param->C <= 0)
3713 return "C <= 0";
3714
3715 if(param->p < 0)
3716 return "p < 0";
3717
3718 if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM)
3719 return "prob->bias >=0, but this is ignored in ONECLASS_SVM";
3720
3721 if(param->regularize_bias == 0)
3722 {
3723 if(prob->bias != 1.0)
3724 return "To not regularize bias, must specify -B 1 along with -R";
3725 if(param->solver_type != L2R_LR
3726 && param->solver_type != L2R_L2LOSS_SVC
3727 && param->solver_type != L1R_L2LOSS_SVC
3728 && param->solver_type != L1R_LR
3729 && param->solver_type != L2R_L2LOSS_SVR)
3730 return "-R option supported only for solver L2R_LR, L2R_L2LOSS_SVC, L1R_L2LOSS_SVC, L1R_LR, and L2R_L2LOSS_SVR";
3731 }
3732
3733 if(param->solver_type != L2R_LR
3734 && param->solver_type != L2R_L2LOSS_SVC_DUAL
3735 && param->solver_type != L2R_L2LOSS_SVC
3736 && param->solver_type != L2R_L1LOSS_SVC_DUAL
3737 && param->solver_type != MCSVM_CS
3738 && param->solver_type != L1R_L2LOSS_SVC
3739 && param->solver_type != L1R_LR
3740 && param->solver_type != L2R_LR_DUAL
3741 && param->solver_type != L2R_L2LOSS_SVR
3742 && param->solver_type != L2R_L2LOSS_SVR_DUAL
3743 && param->solver_type != L2R_L1LOSS_SVR_DUAL
3744 && param->solver_type != ONECLASS_SVM)
3745 return "unknown solver type";
3746
3747 if(param->init_sol != NULL
3748 && param->solver_type != L2R_LR
3749 && param->solver_type != L2R_L2LOSS_SVC
3750 && param->solver_type != L2R_L2LOSS_SVR)
3751 return "Initial-solution specification supported only for solvers L2R_LR, L2R_L2LOSS_SVC, and L2R_L2LOSS_SVR";
3752
3753 return NULL;
3754 }
3755
check_probability_model(const struct model * model_)3756 int check_probability_model(const struct model *model_)
3757 {
3758 return (model_->param.solver_type==L2R_LR ||
3759 model_->param.solver_type==L2R_LR_DUAL ||
3760 model_->param.solver_type==L1R_LR);
3761 }
3762
check_regression_model(const struct model * model_)3763 int check_regression_model(const struct model *model_)
3764 {
3765 return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3766 model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3767 model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3768 }
3769
check_oneclass_model(const struct model * model_)3770 int check_oneclass_model(const struct model *model_)
3771 {
3772 return model_->param.solver_type == ONECLASS_SVM;
3773 }
3774
set_print_string_function(void (* print_func)(const char *))3775 void set_print_string_function(void (*print_func)(const char*))
3776 {
3777 if (print_func == NULL)
3778 liblinear_print_string = &print_string_stdout;
3779 else
3780 liblinear_print_string = print_func;
3781 }
3782
3783