1 /*
2 This code was extracted from liblinear-2.2.1 in Feb 2019 and
3 modified for the use with Octave and Matlab
4
5 Copyright (c) 2007-2019 The LIBLINEAR Project.
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 1. Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14
15 2. Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in the
17 documentation and/or other materials provided with the distribution.
18
19 3. Neither name of copyright holders nor the names of its contributors
20 may be used to endorse or promote products derived from this software
21 without specific prior written permission.
22
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
28 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
36 */
37
38 #include <math.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <stdarg.h>
43 #include <locale.h>
44 #include "linear.h"
45 #include "tron.h"
46 int liblinear_version = LIBLINEAR_VERSION;
47 typedef signed char schar;
swap(T & x,T & y)48 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
49 #ifndef min
min(T x,T y)50 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
51 #endif
52 #ifndef max
max(T x,T y)53 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
54 #endif
clone(T * & dst,S * src,int n)55 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
56 {
57 dst = new T[n];
58 memcpy((void *)dst,(void *)src,sizeof(T)*n);
59 }
60 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
61 #define INF HUGE_VAL
62
print_string_stdout(const char * s)63 static void print_string_stdout(const char *s)
64 {
65 fputs(s,stdout);
66 fflush(stdout);
67 }
print_null(const char *)68 static void print_null(const char*) {}
69
70 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
71
72 #if 1
info(const char * fmt,...)73 static void info(const char *fmt,...)
74 {
75 char buf[BUFSIZ];
76 va_list ap;
77 va_start(ap,fmt);
78 vsprintf(buf,fmt,ap);
79 va_end(ap);
80 (*liblinear_print_string)(buf);
81 }
82 #else
info(const char * fmt,...)83 static void info(const char *fmt,...) {}
84 #endif
85 class sparse_operator
86 {
87 public:
nrm2_sq(const feature_node * x)88 static double nrm2_sq(const feature_node *x)
89 {
90 double ret = 0;
91 while(x->index != -1)
92 {
93 ret += x->value*x->value;
94 x++;
95 }
96 return (ret);
97 }
98
dot(const double * s,const feature_node * x)99 static double dot(const double *s, const feature_node *x)
100 {
101 double ret = 0;
102 while(x->index != -1)
103 {
104 ret += s[x->index-1]*x->value;
105 x++;
106 }
107 return (ret);
108 }
109
axpy(const double a,const feature_node * x,double * y)110 static void axpy(const double a, const feature_node *x, double *y)
111 {
112 while(x->index != -1)
113 {
114 y[x->index-1] += a*x->value;
115 x++;
116 }
117 }
118 };
119
120 class l2r_lr_fun: public function
121 {
122 public:
123 l2r_lr_fun(const problem *prob, double *C);
124 ~l2r_lr_fun();
125
126 double fun(double *w);
127 void grad(double *w, double *g);
128 void Hv(double *s, double *Hs);
129
130 int get_nr_variable(void);
131 void get_diag_preconditioner(double *M);
132
133 private:
134 void Xv(double *v, double *Xv);
135 void XTv(double *v, double *XTv);
136
137 double *C;
138 double *z;
139 double *D;
140 const problem *prob;
141 };
142
l2r_lr_fun(const problem * prob,double * C)143 l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
144 {
145 int l=prob->l;
146
147 this->prob = prob;
148
149 z = new double[l];
150 D = new double[l];
151 this->C = C;
152 }
153
~l2r_lr_fun()154 l2r_lr_fun::~l2r_lr_fun()
155 {
156 delete[] z;
157 delete[] D;
158 }
159
160
fun(double * w)161 double l2r_lr_fun::fun(double *w)
162 {
163 int i;
164 double f=0;
165 double *y=prob->y;
166 int l=prob->l;
167 int w_size=get_nr_variable();
168
169 Xv(w, z);
170
171 for(i=0;i<w_size;i++)
172 f += w[i]*w[i];
173 f /= 2.0;
174 for(i=0;i<l;i++)
175 {
176 double yz = y[i]*z[i];
177 if (yz >= 0)
178 f += C[i]*log(1 + exp(-yz));
179 else
180 f += C[i]*(-yz+log(1 + exp(yz)));
181 }
182
183 return(f);
184 }
185
grad(double * w,double * g)186 void l2r_lr_fun::grad(double *w, double *g)
187 {
188 int i;
189 double *y=prob->y;
190 int l=prob->l;
191 int w_size=get_nr_variable();
192
193 for(i=0;i<l;i++)
194 {
195 z[i] = 1/(1 + exp(-y[i]*z[i]));
196 D[i] = z[i]*(1-z[i]);
197 z[i] = C[i]*(z[i]-1)*y[i];
198 }
199 XTv(z, g);
200
201 for(i=0;i<w_size;i++)
202 g[i] = w[i] + g[i];
203 }
204
get_nr_variable(void)205 int l2r_lr_fun::get_nr_variable(void)
206 {
207 return prob->n;
208 }
209
get_diag_preconditioner(double * M)210 void l2r_lr_fun::get_diag_preconditioner(double *M)
211 {
212 int i;
213 int l = prob->l;
214 int w_size=get_nr_variable();
215 feature_node **x = prob->x;
216
217 for (i=0; i<w_size; i++)
218 M[i] = 1;
219
220 for (i=0; i<l; i++)
221 {
222 feature_node *s = x[i];
223 while (s->index!=-1)
224 {
225 M[s->index-1] += s->value*s->value*C[i]*D[i];
226 s++;
227 }
228 }
229 }
230
Hv(double * s,double * Hs)231 void l2r_lr_fun::Hv(double *s, double *Hs)
232 {
233 int i;
234 int l=prob->l;
235 int w_size=get_nr_variable();
236 feature_node **x=prob->x;
237
238 for(i=0;i<w_size;i++)
239 Hs[i] = 0;
240 for(i=0;i<l;i++)
241 {
242 feature_node * const xi=x[i];
243 double xTs = sparse_operator::dot(s, xi);
244
245 xTs = C[i]*D[i]*xTs;
246
247 sparse_operator::axpy(xTs, xi, Hs);
248 }
249 for(i=0;i<w_size;i++)
250 Hs[i] = s[i] + Hs[i];
251 }
252
Xv(double * v,double * Xv)253 void l2r_lr_fun::Xv(double *v, double *Xv)
254 {
255 int i;
256 int l=prob->l;
257 feature_node **x=prob->x;
258
259 for(i=0;i<l;i++)
260 Xv[i]=sparse_operator::dot(v, x[i]);
261 }
262
XTv(double * v,double * XTv)263 void l2r_lr_fun::XTv(double *v, double *XTv)
264 {
265 int i;
266 int l=prob->l;
267 int w_size=get_nr_variable();
268 feature_node **x=prob->x;
269
270 for(i=0;i<w_size;i++)
271 XTv[i]=0;
272 for(i=0;i<l;i++)
273 sparse_operator::axpy(v[i], x[i], XTv);
274 }
275
276 class l2r_l2_svc_fun: public function
277 {
278 public:
279 l2r_l2_svc_fun(const problem *prob, double *C);
280 ~l2r_l2_svc_fun();
281
282 double fun(double *w);
283 void grad(double *w, double *g);
284 void Hv(double *s, double *Hs);
285
286 int get_nr_variable(void);
287 void get_diag_preconditioner(double *M);
288
289 protected:
290 void Xv(double *v, double *Xv);
291 void subXTv(double *v, double *XTv);
292
293 double *C;
294 double *z;
295 int *I;
296 int sizeI;
297 const problem *prob;
298 };
299
l2r_l2_svc_fun(const problem * prob,double * C)300 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
301 {
302 int l=prob->l;
303
304 this->prob = prob;
305
306 z = new double[l];
307 I = new int[l];
308 this->C = C;
309 }
310
~l2r_l2_svc_fun()311 l2r_l2_svc_fun::~l2r_l2_svc_fun()
312 {
313 delete[] z;
314 delete[] I;
315 }
316
fun(double * w)317 double l2r_l2_svc_fun::fun(double *w)
318 {
319 int i;
320 double f=0;
321 double *y=prob->y;
322 int l=prob->l;
323 int w_size=get_nr_variable();
324
325 Xv(w, z);
326
327 for(i=0;i<w_size;i++)
328 f += w[i]*w[i];
329 f /= 2.0;
330 for(i=0;i<l;i++)
331 {
332 z[i] = y[i]*z[i];
333 double d = 1-z[i];
334 if (d > 0)
335 f += C[i]*d*d;
336 }
337
338 return(f);
339 }
340
grad(double * w,double * g)341 void l2r_l2_svc_fun::grad(double *w, double *g)
342 {
343 int i;
344 double *y=prob->y;
345 int l=prob->l;
346 int w_size=get_nr_variable();
347
348 sizeI = 0;
349 for (i=0;i<l;i++)
350 if (z[i] < 1)
351 {
352 z[sizeI] = C[i]*y[i]*(z[i]-1);
353 I[sizeI] = i;
354 sizeI++;
355 }
356 subXTv(z, g);
357
358 for(i=0;i<w_size;i++)
359 g[i] = w[i] + 2*g[i];
360 }
361
get_nr_variable(void)362 int l2r_l2_svc_fun::get_nr_variable(void)
363 {
364 return prob->n;
365 }
366
get_diag_preconditioner(double * M)367 void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
368 {
369 int i;
370 int w_size=get_nr_variable();
371 feature_node **x = prob->x;
372
373 for (i=0; i<w_size; i++)
374 M[i] = 1;
375
376 for (i=0; i<sizeI; i++)
377 {
378 int idx = I[i];
379 feature_node *s = x[idx];
380 while (s->index!=-1)
381 {
382 M[s->index-1] += s->value*s->value*C[idx]*2;
383 s++;
384 }
385 }
386 }
387
Hv(double * s,double * Hs)388 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
389 {
390 int i;
391 int w_size=get_nr_variable();
392 feature_node **x=prob->x;
393
394 for(i=0;i<w_size;i++)
395 Hs[i]=0;
396 for(i=0;i<sizeI;i++)
397 {
398 feature_node * const xi=x[I[i]];
399 double xTs = sparse_operator::dot(s, xi);
400
401 xTs = C[I[i]]*xTs;
402
403 sparse_operator::axpy(xTs, xi, Hs);
404 }
405 for(i=0;i<w_size;i++)
406 Hs[i] = s[i] + 2*Hs[i];
407 }
408
Xv(double * v,double * Xv)409 void l2r_l2_svc_fun::Xv(double *v, double *Xv)
410 {
411 int i;
412 int l=prob->l;
413 feature_node **x=prob->x;
414
415 for(i=0;i<l;i++)
416 Xv[i]=sparse_operator::dot(v, x[i]);
417 }
418
subXTv(double * v,double * XTv)419 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
420 {
421 int i;
422 int w_size=get_nr_variable();
423 feature_node **x=prob->x;
424
425 for(i=0;i<w_size;i++)
426 XTv[i]=0;
427 for(i=0;i<sizeI;i++)
428 sparse_operator::axpy(v[i], x[I[i]], XTv);
429 }
430
431 class l2r_l2_svr_fun: public l2r_l2_svc_fun
432 {
433 public:
434 l2r_l2_svr_fun(const problem *prob, double *C, double p);
435
436 double fun(double *w);
437 void grad(double *w, double *g);
438
439 private:
440 double p;
441 };
442
l2r_l2_svr_fun(const problem * prob,double * C,double p)443 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
444 l2r_l2_svc_fun(prob, C)
445 {
446 this->p = p;
447 }
448
fun(double * w)449 double l2r_l2_svr_fun::fun(double *w)
450 {
451 int i;
452 double f=0;
453 double *y=prob->y;
454 int l=prob->l;
455 int w_size=get_nr_variable();
456 double d;
457
458 Xv(w, z);
459
460 for(i=0;i<w_size;i++)
461 f += w[i]*w[i];
462 f /= 2;
463 for(i=0;i<l;i++)
464 {
465 d = z[i] - y[i];
466 if(d < -p)
467 f += C[i]*(d+p)*(d+p);
468 else if(d > p)
469 f += C[i]*(d-p)*(d-p);
470 }
471
472 return(f);
473 }
474
grad(double * w,double * g)475 void l2r_l2_svr_fun::grad(double *w, double *g)
476 {
477 int i;
478 double *y=prob->y;
479 int l=prob->l;
480 int w_size=get_nr_variable();
481 double d;
482
483 sizeI = 0;
484 for(i=0;i<l;i++)
485 {
486 d = z[i] - y[i];
487
488 // generate index set I
489 if(d < -p)
490 {
491 z[sizeI] = C[i]*(d+p);
492 I[sizeI] = i;
493 sizeI++;
494 }
495 else if(d > p)
496 {
497 z[sizeI] = C[i]*(d-p);
498 I[sizeI] = i;
499 sizeI++;
500 }
501
502 }
503 subXTv(z, g);
504
505 for(i=0;i<w_size;i++)
506 g[i] = w[i] + 2*g[i];
507 }
508
509 // A coordinate descent algorithm for
510 // multi-class support vector machines by Crammer and Singer
511 //
512 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
513 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
514 //
515 // where e^m_i = 0 if y_i = m,
516 // e^m_i = 1 if y_i != m,
517 // C^m_i = C if m = y_i,
518 // C^m_i = 0 if m != y_i,
519 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
520 //
521 // Given:
522 // x, y, C
523 // eps is the stopping tolerance
524 //
525 // solution will be put in w
526 //
527 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
528
529 #define GETI(i) ((int) prob->y[i])
530 // To support weights for instances, use GETI(i) (i)
531
532 class Solver_MCSVM_CS
533 {
534 public:
535 Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
536 ~Solver_MCSVM_CS();
537 void Solve(double *w);
538 private:
539 void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
540 bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
541 double *B, *C, *G;
542 int w_size, l;
543 int nr_class;
544 int max_iter;
545 double eps;
546 const problem *prob;
547 };
548
Solver_MCSVM_CS(const problem * prob,int nr_class,double * weighted_C,double eps,int max_iter)549 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
550 {
551 this->w_size = prob->n;
552 this->l = prob->l;
553 this->nr_class = nr_class;
554 this->eps = eps;
555 this->max_iter = max_iter;
556 this->prob = prob;
557 this->B = new double[nr_class];
558 this->G = new double[nr_class];
559 this->C = weighted_C;
560 }
561
~Solver_MCSVM_CS()562 Solver_MCSVM_CS::~Solver_MCSVM_CS()
563 {
564 delete[] B;
565 delete[] G;
566 }
567
compare_double(const void * a,const void * b)568 int compare_double(const void *a, const void *b)
569 {
570 if(*(double *)a > *(double *)b)
571 return -1;
572 if(*(double *)a < *(double *)b)
573 return 1;
574 return 0;
575 }
576
solve_sub_problem(double A_i,int yi,double C_yi,int active_i,double * alpha_new)577 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
578 {
579 int r;
580 double *D;
581
582 clone(D, B, active_i);
583 if(yi < active_i)
584 D[yi] += A_i*C_yi;
585 qsort(D, active_i, sizeof(double), compare_double);
586
587 double beta = D[0] - A_i*C_yi;
588 for(r=1;r<active_i && beta<r*D[r];r++)
589 beta += D[r];
590 beta /= r;
591
592 for(r=0;r<active_i;r++)
593 {
594 if(r == yi)
595 alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
596 else
597 alpha_new[r] = min((double)0, (beta - B[r])/A_i);
598 }
599 delete[] D;
600 }
601
be_shrunk(int i,int m,int yi,double alpha_i,double minG)602 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
603 {
604 double bound = 0;
605 if(m == yi)
606 bound = C[GETI(i)];
607 if(alpha_i == bound && G[m] < minG)
608 return true;
609 return false;
610 }
611
Solve(double * w)612 void Solver_MCSVM_CS::Solve(double *w)
613 {
614 int i, m, s;
615 int iter = 0;
616 double *alpha = new double[l*nr_class];
617 double *alpha_new = new double[nr_class];
618 int *index = new int[l];
619 double *QD = new double[l];
620 int *d_ind = new int[nr_class];
621 double *d_val = new double[nr_class];
622 int *alpha_index = new int[nr_class*l];
623 int *y_index = new int[l];
624 int active_size = l;
625 int *active_size_i = new int[l];
626 double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
627 bool start_from_all = true;
628
629 // Initial alpha can be set here. Note that
630 // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
631 // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
632 // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
633 // If initial alpha isn't zero, uncomment the for loop below to initialize w
634 for(i=0;i<l*nr_class;i++)
635 alpha[i] = 0;
636
637 for(i=0;i<w_size*nr_class;i++)
638 w[i] = 0;
639 for(i=0;i<l;i++)
640 {
641 for(m=0;m<nr_class;m++)
642 alpha_index[i*nr_class+m] = m;
643 feature_node *xi = prob->x[i];
644 QD[i] = 0;
645 while(xi->index != -1)
646 {
647 double val = xi->value;
648 QD[i] += val*val;
649
650 // Uncomment the for loop if initial alpha isn't zero
651 // for(m=0; m<nr_class; m++)
652 // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
653 xi++;
654 }
655 active_size_i[i] = nr_class;
656 y_index[i] = (int)prob->y[i];
657 index[i] = i;
658 }
659
660 while(iter < max_iter)
661 {
662 double stopping = -INF;
663 for(i=0;i<active_size;i++)
664 {
665 int j = i+rand()%(active_size-i);
666 swap(index[i], index[j]);
667 }
668 for(s=0;s<active_size;s++)
669 {
670 i = index[s];
671 double Ai = QD[i];
672 double *alpha_i = &alpha[i*nr_class];
673 int *alpha_index_i = &alpha_index[i*nr_class];
674
675 if(Ai > 0)
676 {
677 for(m=0;m<active_size_i[i];m++)
678 G[m] = 1;
679 if(y_index[i] < active_size_i[i])
680 G[y_index[i]] = 0;
681
682 feature_node *xi = prob->x[i];
683 while(xi->index!= -1)
684 {
685 double *w_i = &w[(xi->index-1)*nr_class];
686 for(m=0;m<active_size_i[i];m++)
687 G[m] += w_i[alpha_index_i[m]]*(xi->value);
688 xi++;
689 }
690
691 double minG = INF;
692 double maxG = -INF;
693 for(m=0;m<active_size_i[i];m++)
694 {
695 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
696 minG = G[m];
697 if(G[m] > maxG)
698 maxG = G[m];
699 }
700 if(y_index[i] < active_size_i[i])
701 if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
702 minG = G[y_index[i]];
703
704 for(m=0;m<active_size_i[i];m++)
705 {
706 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
707 {
708 active_size_i[i]--;
709 while(active_size_i[i]>m)
710 {
711 if(!be_shrunk(i, active_size_i[i], y_index[i],
712 alpha_i[alpha_index_i[active_size_i[i]]], minG))
713 {
714 swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
715 swap(G[m], G[active_size_i[i]]);
716 if(y_index[i] == active_size_i[i])
717 y_index[i] = m;
718 else if(y_index[i] == m)
719 y_index[i] = active_size_i[i];
720 break;
721 }
722 active_size_i[i]--;
723 }
724 }
725 }
726
727 if(active_size_i[i] <= 1)
728 {
729 active_size--;
730 swap(index[s], index[active_size]);
731 s--;
732 continue;
733 }
734
735 if(maxG-minG <= 1e-12)
736 continue;
737 else
738 stopping = max(maxG - minG, stopping);
739
740 for(m=0;m<active_size_i[i];m++)
741 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
742
743 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
744 int nz_d = 0;
745 for(m=0;m<active_size_i[i];m++)
746 {
747 double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
748 alpha_i[alpha_index_i[m]] = alpha_new[m];
749 if(fabs(d) >= 1e-12)
750 {
751 d_ind[nz_d] = alpha_index_i[m];
752 d_val[nz_d] = d;
753 nz_d++;
754 }
755 }
756
757 xi = prob->x[i];
758 while(xi->index != -1)
759 {
760 double *w_i = &w[(xi->index-1)*nr_class];
761 for(m=0;m<nz_d;m++)
762 w_i[d_ind[m]] += d_val[m]*xi->value;
763 xi++;
764 }
765 }
766 }
767
768 iter++;
769 if(iter % 10 == 0)
770 {
771 info(".");
772 }
773
774 if(stopping < eps_shrink)
775 {
776 if(stopping < eps && start_from_all == true)
777 break;
778 else
779 {
780 active_size = l;
781 for(i=0;i<l;i++)
782 active_size_i[i] = nr_class;
783 info("*");
784 eps_shrink = max(eps_shrink/2, eps);
785 start_from_all = true;
786 }
787 }
788 else
789 start_from_all = false;
790 }
791
792 info("\noptimization finished, #iter = %d\n",iter);
793 if (iter >= max_iter)
794 info("\nWARNING: reaching max number of iterations\n");
795
796 // calculate objective value
797 double v = 0;
798 int nSV = 0;
799 for(i=0;i<w_size*nr_class;i++)
800 v += w[i]*w[i];
801 v = 0.5*v;
802 for(i=0;i<l*nr_class;i++)
803 {
804 v += alpha[i];
805 if(fabs(alpha[i]) > 0)
806 nSV++;
807 }
808 for(i=0;i<l;i++)
809 v -= alpha[i*nr_class+(int)prob->y[i]];
810 info("Objective value = %lf\n",v);
811 info("nSV = %d\n",nSV);
812
813 delete [] alpha;
814 delete [] alpha_new;
815 delete [] index;
816 delete [] QD;
817 delete [] d_ind;
818 delete [] d_val;
819 delete [] alpha_index;
820 delete [] y_index;
821 delete [] active_size_i;
822 }
823
824 // A coordinate descent algorithm for
825 // L1-loss and L2-loss SVM dual problems
826 //
827 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
828 // s.t. 0 <= \alpha_i <= upper_bound_i,
829 //
830 // where Qij = yi yj xi^T xj and
831 // D is a diagonal matrix
832 //
833 // In L1-SVM case:
834 // upper_bound_i = Cp if y_i = 1
835 // upper_bound_i = Cn if y_i = -1
836 // D_ii = 0
837 // In L2-SVM case:
838 // upper_bound_i = INF
839 // D_ii = 1/(2*Cp) if y_i = 1
840 // D_ii = 1/(2*Cn) if y_i = -1
841 //
842 // Given:
843 // x, y, Cp, Cn
844 // eps is the stopping tolerance
845 //
846 // solution will be put in w
847 //
848 // See Algorithm 3 of Hsieh et al., ICML 2008
849
850 #undef GETI
851 #define GETI(i) (y[i]+1)
852 // To support weights for instances, use GETI(i) (i)
853
solve_l2r_l1l2_svc(const problem * prob,double * w,double eps,double Cp,double Cn,int solver_type)854 static void solve_l2r_l1l2_svc(
855 const problem *prob, double *w, double eps,
856 double Cp, double Cn, int solver_type)
857 {
858 int l = prob->l;
859 int w_size = prob->n;
860 int i, s, iter = 0;
861 double C, d, G;
862 double *QD = new double[l];
863 int max_iter = 1000;
864 int *index = new int[l];
865 double *alpha = new double[l];
866 schar *y = new schar[l];
867 int active_size = l;
868
869 // PG: projected gradient, for shrinking and stopping
870 double PG;
871 double PGmax_old = INF;
872 double PGmin_old = -INF;
873 double PGmax_new, PGmin_new;
874
875 // default solver_type: L2R_L2LOSS_SVC_DUAL
876 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
877 double upper_bound[3] = {INF, 0, INF};
878 if(solver_type == L2R_L1LOSS_SVC_DUAL)
879 {
880 diag[0] = 0;
881 diag[2] = 0;
882 upper_bound[0] = Cn;
883 upper_bound[2] = Cp;
884 }
885
886 for(i=0; i<l; i++)
887 {
888 if(prob->y[i] > 0)
889 {
890 y[i] = +1;
891 }
892 else
893 {
894 y[i] = -1;
895 }
896 }
897
898 // Initial alpha can be set here. Note that
899 // 0 <= alpha[i] <= upper_bound[GETI(i)]
900 for(i=0; i<l; i++)
901 alpha[i] = 0;
902
903 for(i=0; i<w_size; i++)
904 w[i] = 0;
905 for(i=0; i<l; i++)
906 {
907 QD[i] = diag[GETI(i)];
908
909 feature_node * const xi = prob->x[i];
910 QD[i] += sparse_operator::nrm2_sq(xi);
911 sparse_operator::axpy(y[i]*alpha[i], xi, w);
912
913 index[i] = i;
914 }
915
916 while (iter < max_iter)
917 {
918 PGmax_new = -INF;
919 PGmin_new = INF;
920
921 for (i=0; i<active_size; i++)
922 {
923 int j = i+rand()%(active_size-i);
924 swap(index[i], index[j]);
925 }
926
927 for (s=0; s<active_size; s++)
928 {
929 i = index[s];
930 const schar yi = y[i];
931 feature_node * const xi = prob->x[i];
932
933 G = yi*sparse_operator::dot(w, xi)-1;
934
935 C = upper_bound[GETI(i)];
936 G += alpha[i]*diag[GETI(i)];
937
938 PG = 0;
939 if (alpha[i] == 0)
940 {
941 if (G > PGmax_old)
942 {
943 active_size--;
944 swap(index[s], index[active_size]);
945 s--;
946 continue;
947 }
948 else if (G < 0)
949 PG = G;
950 }
951 else if (alpha[i] == C)
952 {
953 if (G < PGmin_old)
954 {
955 active_size--;
956 swap(index[s], index[active_size]);
957 s--;
958 continue;
959 }
960 else if (G > 0)
961 PG = G;
962 }
963 else
964 PG = G;
965
966 PGmax_new = max(PGmax_new, PG);
967 PGmin_new = min(PGmin_new, PG);
968
969 if(fabs(PG) > 1.0e-12)
970 {
971 double alpha_old = alpha[i];
972 alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
973 d = (alpha[i] - alpha_old)*yi;
974 sparse_operator::axpy(d, xi, w);
975 }
976 }
977
978 iter++;
979 if(iter % 10 == 0)
980 info(".");
981
982 if(PGmax_new - PGmin_new <= eps)
983 {
984 if(active_size == l)
985 break;
986 else
987 {
988 active_size = l;
989 info("*");
990 PGmax_old = INF;
991 PGmin_old = -INF;
992 continue;
993 }
994 }
995 PGmax_old = PGmax_new;
996 PGmin_old = PGmin_new;
997 if (PGmax_old <= 0)
998 PGmax_old = INF;
999 if (PGmin_old >= 0)
1000 PGmin_old = -INF;
1001 }
1002
1003 info("\noptimization finished, #iter = %d\n",iter);
1004 if (iter >= max_iter)
1005 info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
1006
1007 // calculate objective value
1008
1009 double v = 0;
1010 int nSV = 0;
1011 for(i=0; i<w_size; i++)
1012 v += w[i]*w[i];
1013 for(i=0; i<l; i++)
1014 {
1015 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
1016 if(alpha[i] > 0)
1017 ++nSV;
1018 }
1019 info("Objective value = %lf\n",v/2);
1020 info("nSV = %d\n",nSV);
1021
1022 delete [] QD;
1023 delete [] alpha;
1024 delete [] y;
1025 delete [] index;
1026 }
1027
1028
1029 // A coordinate descent algorithm for
1030 // L1-loss and L2-loss epsilon-SVR dual problem
1031 //
1032 // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
1033 // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
1034 //
1035 // where Qij = xi^T xj and
1036 // D is a diagonal matrix
1037 //
1038 // In L1-SVM case:
1039 // upper_bound_i = C
1040 // lambda_i = 0
1041 // In L2-SVM case:
1042 // upper_bound_i = INF
1043 // lambda_i = 1/(2*C)
1044 //
1045 // Given:
1046 // x, y, p, C
1047 // eps is the stopping tolerance
1048 //
1049 // solution will be put in w
1050 //
1051 // See Algorithm 4 of Ho and Lin, 2012
1052
1053 #undef GETI
1054 #define GETI(i) (0)
1055 // To support weights for instances, use GETI(i) (i)
1056
solve_l2r_l1l2_svr(const problem * prob,double * w,const parameter * param,int solver_type)1057 static void solve_l2r_l1l2_svr(
1058 const problem *prob, double *w, const parameter *param,
1059 int solver_type)
1060 {
1061 int l = prob->l;
1062 double C = param->C;
1063 double p = param->p;
1064 int w_size = prob->n;
1065 double eps = param->eps;
1066 int i, s, iter = 0;
1067 int max_iter = 1000;
1068 int active_size = l;
1069 int *index = new int[l];
1070
1071 double d, G, H;
1072 double Gmax_old = INF;
1073 double Gmax_new, Gnorm1_new;
1074 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1075 double *beta = new double[l];
1076 double *QD = new double[l];
1077 double *y = prob->y;
1078
1079 // L2R_L2LOSS_SVR_DUAL
1080 double lambda[1], upper_bound[1];
1081 lambda[0] = 0.5/C;
1082 upper_bound[0] = INF;
1083
1084 if(solver_type == L2R_L1LOSS_SVR_DUAL)
1085 {
1086 lambda[0] = 0;
1087 upper_bound[0] = C;
1088 }
1089
1090 // Initial beta can be set here. Note that
1091 // -upper_bound <= beta[i] <= upper_bound
1092 for(i=0; i<l; i++)
1093 beta[i] = 0;
1094
1095 for(i=0; i<w_size; i++)
1096 w[i] = 0;
1097 for(i=0; i<l; i++)
1098 {
1099 feature_node * const xi = prob->x[i];
1100 QD[i] = sparse_operator::nrm2_sq(xi);
1101 sparse_operator::axpy(beta[i], xi, w);
1102
1103 index[i] = i;
1104 }
1105
1106
1107 while(iter < max_iter)
1108 {
1109 Gmax_new = 0;
1110 Gnorm1_new = 0;
1111
1112 for(i=0; i<active_size; i++)
1113 {
1114 int j = i+rand()%(active_size-i);
1115 swap(index[i], index[j]);
1116 }
1117
1118 for(s=0; s<active_size; s++)
1119 {
1120 i = index[s];
1121 G = -y[i] + lambda[GETI(i)]*beta[i];
1122 H = QD[i] + lambda[GETI(i)];
1123
1124 feature_node * const xi = prob->x[i];
1125 G += sparse_operator::dot(w, xi);
1126
1127 double Gp = G+p;
1128 double Gn = G-p;
1129 double violation = 0;
1130 if(beta[i] == 0)
1131 {
1132 if(Gp < 0)
1133 violation = -Gp;
1134 else if(Gn > 0)
1135 violation = Gn;
1136 else if(Gp>Gmax_old && Gn<-Gmax_old)
1137 {
1138 active_size--;
1139 swap(index[s], index[active_size]);
1140 s--;
1141 continue;
1142 }
1143 }
1144 else if(beta[i] >= upper_bound[GETI(i)])
1145 {
1146 if(Gp > 0)
1147 violation = Gp;
1148 else if(Gp < -Gmax_old)
1149 {
1150 active_size--;
1151 swap(index[s], index[active_size]);
1152 s--;
1153 continue;
1154 }
1155 }
1156 else if(beta[i] <= -upper_bound[GETI(i)])
1157 {
1158 if(Gn < 0)
1159 violation = -Gn;
1160 else if(Gn > Gmax_old)
1161 {
1162 active_size--;
1163 swap(index[s], index[active_size]);
1164 s--;
1165 continue;
1166 }
1167 }
1168 else if(beta[i] > 0)
1169 violation = fabs(Gp);
1170 else
1171 violation = fabs(Gn);
1172
1173 Gmax_new = max(Gmax_new, violation);
1174 Gnorm1_new += violation;
1175
1176 // obtain Newton direction d
1177 if(Gp < H*beta[i])
1178 d = -Gp/H;
1179 else if(Gn > H*beta[i])
1180 d = -Gn/H;
1181 else
1182 d = -beta[i];
1183
1184 if(fabs(d) < 1.0e-12)
1185 continue;
1186
1187 double beta_old = beta[i];
1188 beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1189 d = beta[i]-beta_old;
1190
1191 if(d != 0)
1192 sparse_operator::axpy(d, xi, w);
1193 }
1194
1195 if(iter == 0)
1196 Gnorm1_init = Gnorm1_new;
1197 iter++;
1198 if(iter % 10 == 0)
1199 info(".");
1200
1201 if(Gnorm1_new <= eps*Gnorm1_init)
1202 {
1203 if(active_size == l)
1204 break;
1205 else
1206 {
1207 active_size = l;
1208 info("*");
1209 Gmax_old = INF;
1210 continue;
1211 }
1212 }
1213
1214 Gmax_old = Gmax_new;
1215 }
1216
1217 info("\noptimization finished, #iter = %d\n", iter);
1218 if(iter >= max_iter)
1219 info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1220
1221 // calculate objective value
1222 double v = 0;
1223 int nSV = 0;
1224 for(i=0; i<w_size; i++)
1225 v += w[i]*w[i];
1226 v = 0.5*v;
1227 for(i=0; i<l; i++)
1228 {
1229 v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1230 if(beta[i] != 0)
1231 nSV++;
1232 }
1233
1234 info("Objective value = %lf\n", v);
1235 info("nSV = %d\n",nSV);
1236
1237 delete [] beta;
1238 delete [] QD;
1239 delete [] index;
1240 }
1241
1242
1243 // A coordinate descent algorithm for
1244 // the dual of L2-regularized logistic regression problems
1245 //
1246 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1247 // s.t. 0 <= \alpha_i <= upper_bound_i,
1248 //
1249 // where Qij = yi yj xi^T xj and
1250 // upper_bound_i = Cp if y_i = 1
1251 // upper_bound_i = Cn if y_i = -1
1252 //
1253 // Given:
1254 // x, y, Cp, Cn
1255 // eps is the stopping tolerance
1256 //
1257 // solution will be put in w
1258 //
1259 // See Algorithm 5 of Yu et al., MLJ 2010
1260
1261 #undef GETI
1262 #define GETI(i) (y[i]+1)
1263 // To support weights for instances, use GETI(i) (i)
1264
solve_l2r_lr_dual(const problem * prob,double * w,double eps,double Cp,double Cn)1265 void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1266 {
1267 int l = prob->l;
1268 int w_size = prob->n;
1269 int i, s, iter = 0;
1270 double *xTx = new double[l];
1271 int max_iter = 1000;
1272 int *index = new int[l];
1273 double *alpha = new double[2*l]; // store alpha and C - alpha
1274 schar *y = new schar[l];
1275 int max_inner_iter = 100; // for inner Newton
1276 double innereps = 1e-2;
1277 double innereps_min = min(1e-8, eps);
1278 double upper_bound[3] = {Cn, 0, Cp};
1279
1280 for(i=0; i<l; i++)
1281 {
1282 if(prob->y[i] > 0)
1283 {
1284 y[i] = +1;
1285 }
1286 else
1287 {
1288 y[i] = -1;
1289 }
1290 }
1291
1292 // Initial alpha can be set here. Note that
1293 // 0 < alpha[i] < upper_bound[GETI(i)]
1294 // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1295 for(i=0; i<l; i++)
1296 {
1297 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1298 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1299 }
1300
1301 for(i=0; i<w_size; i++)
1302 w[i] = 0;
1303 for(i=0; i<l; i++)
1304 {
1305 feature_node * const xi = prob->x[i];
1306 xTx[i] = sparse_operator::nrm2_sq(xi);
1307 sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1308 index[i] = i;
1309 }
1310
1311 while (iter < max_iter)
1312 {
1313 for (i=0; i<l; i++)
1314 {
1315 int j = i+rand()%(l-i);
1316 swap(index[i], index[j]);
1317 }
1318 int newton_iter = 0;
1319 double Gmax = 0;
1320 for (s=0; s<l; s++)
1321 {
1322 i = index[s];
1323 const schar yi = y[i];
1324 double C = upper_bound[GETI(i)];
1325 double ywTx = 0, xisq = xTx[i];
1326 feature_node * const xi = prob->x[i];
1327 ywTx = yi*sparse_operator::dot(w, xi);
1328 double a = xisq, b = ywTx;
1329
1330 // Decide to minimize g_1(z) or g_2(z)
1331 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1332 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1333 {
1334 ind1 = 2*i+1;
1335 ind2 = 2*i;
1336 sign = -1;
1337 }
1338
1339 // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1340 double alpha_old = alpha[ind1];
1341 double z = alpha_old;
1342 if(C - z < 0.5 * C)
1343 z = 0.1*z;
1344 double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1345 Gmax = max(Gmax, fabs(gp));
1346
1347 // Newton method on the sub-problem
1348 const double eta = 0.1; // xi in the paper
1349 int inner_iter = 0;
1350 while (inner_iter <= max_inner_iter)
1351 {
1352 if(fabs(gp) < innereps)
1353 break;
1354 double gpp = a + C/(C-z)/z;
1355 double tmpz = z - gp/gpp;
1356 if(tmpz <= 0)
1357 z *= eta;
1358 else // tmpz in (0, C)
1359 z = tmpz;
1360 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1361 newton_iter++;
1362 inner_iter++;
1363 }
1364
1365 if(inner_iter > 0) // update w
1366 {
1367 alpha[ind1] = z;
1368 alpha[ind2] = C-z;
1369 sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1370 }
1371 }
1372
1373 iter++;
1374 if(iter % 10 == 0)
1375 info(".");
1376
1377 if(Gmax < eps)
1378 break;
1379
1380 if(newton_iter <= l/10)
1381 innereps = max(innereps_min, 0.1*innereps);
1382
1383 }
1384
1385 info("\noptimization finished, #iter = %d\n",iter);
1386 if (iter >= max_iter)
1387 info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1388
1389 // calculate objective value
1390
1391 double v = 0;
1392 for(i=0; i<w_size; i++)
1393 v += w[i] * w[i];
1394 v *= 0.5;
1395 for(i=0; i<l; i++)
1396 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1397 - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1398 info("Objective value = %lf\n", v);
1399
1400 delete [] xTx;
1401 delete [] alpha;
1402 delete [] y;
1403 delete [] index;
1404 }
1405
1406 // A coordinate descent algorithm for
1407 // L1-regularized L2-loss support vector classification
1408 //
1409 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1410 //
1411 // Given:
1412 // x, y, Cp, Cn
1413 // eps is the stopping tolerance
1414 //
1415 // solution will be put in w
1416 //
1417 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1418
1419 #undef GETI
1420 #define GETI(i) (y[i]+1)
1421 // To support weights for instances, use GETI(i) (i)
1422
solve_l1r_l2_svc(problem * prob_col,double * w,double eps,double Cp,double Cn)1423 static void solve_l1r_l2_svc(
1424 problem *prob_col, double *w, double eps,
1425 double Cp, double Cn)
1426 {
1427 int l = prob_col->l;
1428 int w_size = prob_col->n;
1429 int j, s, iter = 0;
1430 int max_iter = 1000;
1431 int active_size = w_size;
1432 int max_num_linesearch = 20;
1433
1434 double sigma = 0.01;
1435 double d, G_loss, G, H;
1436 double Gmax_old = INF;
1437 double Gmax_new, Gnorm1_new;
1438 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1439 double d_old, d_diff;
1440 double loss_old, loss_new;
1441 double appxcond, cond;
1442
1443 int *index = new int[w_size];
1444 schar *y = new schar[l];
1445 double *b = new double[l]; // b = 1-ywTx
1446 double *xj_sq = new double[w_size];
1447 feature_node *x;
1448
1449 double C[3] = {Cn,0,Cp};
1450
1451 // Initial w can be set here.
1452 for(j=0; j<w_size; j++)
1453 w[j] = 0;
1454
1455 for(j=0; j<l; j++)
1456 {
1457 b[j] = 1;
1458 if(prob_col->y[j] > 0)
1459 y[j] = 1;
1460 else
1461 y[j] = -1;
1462 }
1463 for(j=0; j<w_size; j++)
1464 {
1465 index[j] = j;
1466 xj_sq[j] = 0;
1467 x = prob_col->x[j];
1468 while(x->index != -1)
1469 {
1470 int ind = x->index-1;
1471 x->value *= y[ind]; // x->value stores yi*xij
1472 double val = x->value;
1473 b[ind] -= w[j]*val;
1474 xj_sq[j] += C[GETI(ind)]*val*val;
1475 x++;
1476 }
1477 }
1478
1479 while(iter < max_iter)
1480 {
1481 Gmax_new = 0;
1482 Gnorm1_new = 0;
1483
1484 for(j=0; j<active_size; j++)
1485 {
1486 int i = j+rand()%(active_size-j);
1487 swap(index[i], index[j]);
1488 }
1489
1490 for(s=0; s<active_size; s++)
1491 {
1492 j = index[s];
1493 G_loss = 0;
1494 H = 0;
1495
1496 x = prob_col->x[j];
1497 while(x->index != -1)
1498 {
1499 int ind = x->index-1;
1500 if(b[ind] > 0)
1501 {
1502 double val = x->value;
1503 double tmp = C[GETI(ind)]*val;
1504 G_loss -= tmp*b[ind];
1505 H += tmp*val;
1506 }
1507 x++;
1508 }
1509 G_loss *= 2;
1510
1511 G = G_loss;
1512 H *= 2;
1513 H = max(H, 1e-12);
1514
1515 double Gp = G+1;
1516 double Gn = G-1;
1517 double violation = 0;
1518 if(w[j] == 0)
1519 {
1520 if(Gp < 0)
1521 violation = -Gp;
1522 else if(Gn > 0)
1523 violation = Gn;
1524 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1525 {
1526 active_size--;
1527 swap(index[s], index[active_size]);
1528 s--;
1529 continue;
1530 }
1531 }
1532 else if(w[j] > 0)
1533 violation = fabs(Gp);
1534 else
1535 violation = fabs(Gn);
1536
1537 Gmax_new = max(Gmax_new, violation);
1538 Gnorm1_new += violation;
1539
1540 // obtain Newton direction d
1541 if(Gp < H*w[j])
1542 d = -Gp/H;
1543 else if(Gn > H*w[j])
1544 d = -Gn/H;
1545 else
1546 d = -w[j];
1547
1548 if(fabs(d) < 1.0e-12)
1549 continue;
1550
1551 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1552 d_old = 0;
1553 int num_linesearch;
1554 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1555 {
1556 d_diff = d_old - d;
1557 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1558
1559 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1560 if(appxcond <= 0)
1561 {
1562 x = prob_col->x[j];
1563 sparse_operator::axpy(d_diff, x, b);
1564 break;
1565 }
1566
1567 if(num_linesearch == 0)
1568 {
1569 loss_old = 0;
1570 loss_new = 0;
1571 x = prob_col->x[j];
1572 while(x->index != -1)
1573 {
1574 int ind = x->index-1;
1575 if(b[ind] > 0)
1576 loss_old += C[GETI(ind)]*b[ind]*b[ind];
1577 double b_new = b[ind] + d_diff*x->value;
1578 b[ind] = b_new;
1579 if(b_new > 0)
1580 loss_new += C[GETI(ind)]*b_new*b_new;
1581 x++;
1582 }
1583 }
1584 else
1585 {
1586 loss_new = 0;
1587 x = prob_col->x[j];
1588 while(x->index != -1)
1589 {
1590 int ind = x->index-1;
1591 double b_new = b[ind] + d_diff*x->value;
1592 b[ind] = b_new;
1593 if(b_new > 0)
1594 loss_new += C[GETI(ind)]*b_new*b_new;
1595 x++;
1596 }
1597 }
1598
1599 cond = cond + loss_new - loss_old;
1600 if(cond <= 0)
1601 break;
1602 else
1603 {
1604 d_old = d;
1605 d *= 0.5;
1606 delta *= 0.5;
1607 }
1608 }
1609
1610 w[j] += d;
1611
1612 // recompute b[] if line search takes too many steps
1613 if(num_linesearch >= max_num_linesearch)
1614 {
1615 info("#");
1616 for(int i=0; i<l; i++)
1617 b[i] = 1;
1618
1619 for(int i=0; i<w_size; i++)
1620 {
1621 if(w[i]==0) continue;
1622 x = prob_col->x[i];
1623 sparse_operator::axpy(-w[i], x, b);
1624 }
1625 }
1626 }
1627
1628 if(iter == 0)
1629 Gnorm1_init = Gnorm1_new;
1630 iter++;
1631 if(iter % 10 == 0)
1632 info(".");
1633
1634 if(Gnorm1_new <= eps*Gnorm1_init)
1635 {
1636 if(active_size == w_size)
1637 break;
1638 else
1639 {
1640 active_size = w_size;
1641 info("*");
1642 Gmax_old = INF;
1643 continue;
1644 }
1645 }
1646
1647 Gmax_old = Gmax_new;
1648 }
1649
1650 info("\noptimization finished, #iter = %d\n", iter);
1651 if(iter >= max_iter)
1652 info("\nWARNING: reaching max number of iterations\n");
1653
1654 // calculate objective value
1655
1656 double v = 0;
1657 int nnz = 0;
1658 for(j=0; j<w_size; j++)
1659 {
1660 x = prob_col->x[j];
1661 while(x->index != -1)
1662 {
1663 x->value *= prob_col->y[x->index-1]; // restore x->value
1664 x++;
1665 }
1666 if(w[j] != 0)
1667 {
1668 v += fabs(w[j]);
1669 nnz++;
1670 }
1671 }
1672 for(j=0; j<l; j++)
1673 if(b[j] > 0)
1674 v += C[GETI(j)]*b[j]*b[j];
1675
1676 info("Objective value = %lf\n", v);
1677 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1678
1679 delete [] index;
1680 delete [] y;
1681 delete [] b;
1682 delete [] xj_sq;
1683 }
1684
1685 // A coordinate descent algorithm for
1686 // L1-regularized logistic regression problems
1687 //
1688 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1689 //
1690 // Given:
1691 // x, y, Cp, Cn
1692 // eps is the stopping tolerance
1693 //
1694 // solution will be put in w
1695 //
1696 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1697
1698 #undef GETI
1699 #define GETI(i) (y[i]+1)
1700 // To support weights for instances, use GETI(i) (i)
1701
solve_l1r_lr(const problem * prob_col,double * w,double eps,double Cp,double Cn)1702 static void solve_l1r_lr(
1703 const problem *prob_col, double *w, double eps,
1704 double Cp, double Cn)
1705 {
1706 int l = prob_col->l;
1707 int w_size = prob_col->n;
1708 int j, s, newton_iter=0, iter=0;
1709 int max_newton_iter = 100;
1710 int max_iter = 1000;
1711 int max_num_linesearch = 20;
1712 int active_size;
1713 int QP_active_size;
1714
1715 double nu = 1e-12;
1716 double inner_eps = 1;
1717 double sigma = 0.01;
1718 double w_norm, w_norm_new;
1719 double z, G, H;
1720 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1721 double Gmax_old = INF;
1722 double Gmax_new, Gnorm1_new;
1723 double QP_Gmax_old = INF;
1724 double QP_Gmax_new, QP_Gnorm1_new;
1725 double delta, negsum_xTd, cond;
1726
1727 int *index = new int[w_size];
1728 schar *y = new schar[l];
1729 double *Hdiag = new double[w_size];
1730 double *Grad = new double[w_size];
1731 double *wpd = new double[w_size];
1732 double *xjneg_sum = new double[w_size];
1733 double *xTd = new double[l];
1734 double *exp_wTx = new double[l];
1735 double *exp_wTx_new = new double[l];
1736 double *tau = new double[l];
1737 double *D = new double[l];
1738 feature_node *x;
1739
1740 double C[3] = {Cn,0,Cp};
1741
1742 // Initial w can be set here.
1743 for(j=0; j<w_size; j++)
1744 w[j] = 0;
1745
1746 for(j=0; j<l; j++)
1747 {
1748 if(prob_col->y[j] > 0)
1749 y[j] = 1;
1750 else
1751 y[j] = -1;
1752
1753 exp_wTx[j] = 0;
1754 }
1755
1756 w_norm = 0;
1757 for(j=0; j<w_size; j++)
1758 {
1759 w_norm += fabs(w[j]);
1760 wpd[j] = w[j];
1761 index[j] = j;
1762 xjneg_sum[j] = 0;
1763 x = prob_col->x[j];
1764 while(x->index != -1)
1765 {
1766 int ind = x->index-1;
1767 double val = x->value;
1768 exp_wTx[ind] += w[j]*val;
1769 if(y[ind] == -1)
1770 xjneg_sum[j] += C[GETI(ind)]*val;
1771 x++;
1772 }
1773 }
1774 for(j=0; j<l; j++)
1775 {
1776 exp_wTx[j] = exp(exp_wTx[j]);
1777 double tau_tmp = 1/(1+exp_wTx[j]);
1778 tau[j] = C[GETI(j)]*tau_tmp;
1779 D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1780 }
1781
1782 while(newton_iter < max_newton_iter)
1783 {
1784 Gmax_new = 0;
1785 Gnorm1_new = 0;
1786 active_size = w_size;
1787
1788 for(s=0; s<active_size; s++)
1789 {
1790 j = index[s];
1791 Hdiag[j] = nu;
1792 Grad[j] = 0;
1793
1794 double tmp = 0;
1795 x = prob_col->x[j];
1796 while(x->index != -1)
1797 {
1798 int ind = x->index-1;
1799 Hdiag[j] += x->value*x->value*D[ind];
1800 tmp += x->value*tau[ind];
1801 x++;
1802 }
1803 Grad[j] = -tmp + xjneg_sum[j];
1804
1805 double Gp = Grad[j]+1;
1806 double Gn = Grad[j]-1;
1807 double violation = 0;
1808 if(w[j] == 0)
1809 {
1810 if(Gp < 0)
1811 violation = -Gp;
1812 else if(Gn > 0)
1813 violation = Gn;
1814 //outer-level shrinking
1815 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1816 {
1817 active_size--;
1818 swap(index[s], index[active_size]);
1819 s--;
1820 continue;
1821 }
1822 }
1823 else if(w[j] > 0)
1824 violation = fabs(Gp);
1825 else
1826 violation = fabs(Gn);
1827
1828 Gmax_new = max(Gmax_new, violation);
1829 Gnorm1_new += violation;
1830 }
1831
1832 if(newton_iter == 0)
1833 Gnorm1_init = Gnorm1_new;
1834
1835 if(Gnorm1_new <= eps*Gnorm1_init)
1836 break;
1837
1838 iter = 0;
1839 QP_Gmax_old = INF;
1840 QP_active_size = active_size;
1841
1842 for(int i=0; i<l; i++)
1843 xTd[i] = 0;
1844
1845 // optimize QP over wpd
1846 while(iter < max_iter)
1847 {
1848 QP_Gmax_new = 0;
1849 QP_Gnorm1_new = 0;
1850
1851 for(j=0; j<QP_active_size; j++)
1852 {
1853 int i = j+rand()%(QP_active_size-j);
1854 swap(index[i], index[j]);
1855 }
1856
1857 for(s=0; s<QP_active_size; s++)
1858 {
1859 j = index[s];
1860 H = Hdiag[j];
1861
1862 x = prob_col->x[j];
1863 G = Grad[j] + (wpd[j]-w[j])*nu;
1864 while(x->index != -1)
1865 {
1866 int ind = x->index-1;
1867 G += x->value*D[ind]*xTd[ind];
1868 x++;
1869 }
1870
1871 double Gp = G+1;
1872 double Gn = G-1;
1873 double violation = 0;
1874 if(wpd[j] == 0)
1875 {
1876 if(Gp < 0)
1877 violation = -Gp;
1878 else if(Gn > 0)
1879 violation = Gn;
1880 //inner-level shrinking
1881 else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1882 {
1883 QP_active_size--;
1884 swap(index[s], index[QP_active_size]);
1885 s--;
1886 continue;
1887 }
1888 }
1889 else if(wpd[j] > 0)
1890 violation = fabs(Gp);
1891 else
1892 violation = fabs(Gn);
1893
1894 QP_Gmax_new = max(QP_Gmax_new, violation);
1895 QP_Gnorm1_new += violation;
1896
1897 // obtain solution of one-variable problem
1898 if(Gp < H*wpd[j])
1899 z = -Gp/H;
1900 else if(Gn > H*wpd[j])
1901 z = -Gn/H;
1902 else
1903 z = -wpd[j];
1904
1905 if(fabs(z) < 1.0e-12)
1906 continue;
1907 z = min(max(z,-10.0),10.0);
1908
1909 wpd[j] += z;
1910
1911 x = prob_col->x[j];
1912 sparse_operator::axpy(z, x, xTd);
1913 }
1914
1915 iter++;
1916
1917 if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1918 {
1919 //inner stopping
1920 if(QP_active_size == active_size)
1921 break;
1922 //active set reactivation
1923 else
1924 {
1925 QP_active_size = active_size;
1926 QP_Gmax_old = INF;
1927 continue;
1928 }
1929 }
1930
1931 QP_Gmax_old = QP_Gmax_new;
1932 }
1933
1934 if(iter >= max_iter)
1935 info("WARNING: reaching max number of inner iterations\n");
1936
1937 delta = 0;
1938 w_norm_new = 0;
1939 for(j=0; j<w_size; j++)
1940 {
1941 delta += Grad[j]*(wpd[j]-w[j]);
1942 if(wpd[j] != 0)
1943 w_norm_new += fabs(wpd[j]);
1944 }
1945 delta += (w_norm_new-w_norm);
1946
1947 negsum_xTd = 0;
1948 for(int i=0; i<l; i++)
1949 if(y[i] == -1)
1950 negsum_xTd += C[GETI(i)]*xTd[i];
1951
1952 int num_linesearch;
1953 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1954 {
1955 cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1956
1957 for(int i=0; i<l; i++)
1958 {
1959 double exp_xTd = exp(xTd[i]);
1960 exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1961 cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1962 }
1963
1964 if(cond <= 0)
1965 {
1966 w_norm = w_norm_new;
1967 for(j=0; j<w_size; j++)
1968 w[j] = wpd[j];
1969 for(int i=0; i<l; i++)
1970 {
1971 exp_wTx[i] = exp_wTx_new[i];
1972 double tau_tmp = 1/(1+exp_wTx[i]);
1973 tau[i] = C[GETI(i)]*tau_tmp;
1974 D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1975 }
1976 break;
1977 }
1978 else
1979 {
1980 w_norm_new = 0;
1981 for(j=0; j<w_size; j++)
1982 {
1983 wpd[j] = (w[j]+wpd[j])*0.5;
1984 if(wpd[j] != 0)
1985 w_norm_new += fabs(wpd[j]);
1986 }
1987 delta *= 0.5;
1988 negsum_xTd *= 0.5;
1989 for(int i=0; i<l; i++)
1990 xTd[i] *= 0.5;
1991 }
1992 }
1993
1994 // Recompute some info due to too many line search steps
1995 if(num_linesearch >= max_num_linesearch)
1996 {
1997 for(int i=0; i<l; i++)
1998 exp_wTx[i] = 0;
1999
2000 for(int i=0; i<w_size; i++)
2001 {
2002 if(w[i]==0) continue;
2003 x = prob_col->x[i];
2004 sparse_operator::axpy(w[i], x, exp_wTx);
2005 }
2006
2007 for(int i=0; i<l; i++)
2008 exp_wTx[i] = exp(exp_wTx[i]);
2009 }
2010
2011 if(iter == 1)
2012 inner_eps *= 0.25;
2013
2014 newton_iter++;
2015 Gmax_old = Gmax_new;
2016
2017 info("iter %3d #CD cycles %d\n", newton_iter, iter);
2018 }
2019
2020 info("=========================\n");
2021 info("optimization finished, #iter = %d\n", newton_iter);
2022 if(newton_iter >= max_newton_iter)
2023 info("WARNING: reaching max number of iterations\n");
2024
2025 // calculate objective value
2026
2027 double v = 0;
2028 int nnz = 0;
2029 for(j=0; j<w_size; j++)
2030 if(w[j] != 0)
2031 {
2032 v += fabs(w[j]);
2033 nnz++;
2034 }
2035 for(j=0; j<l; j++)
2036 if(y[j] == 1)
2037 v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2038 else
2039 v += C[GETI(j)]*log(1+exp_wTx[j]);
2040
2041 info("Objective value = %lf\n", v);
2042 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2043
2044 delete [] index;
2045 delete [] y;
2046 delete [] Hdiag;
2047 delete [] Grad;
2048 delete [] wpd;
2049 delete [] xjneg_sum;
2050 delete [] xTd;
2051 delete [] exp_wTx;
2052 delete [] exp_wTx_new;
2053 delete [] tau;
2054 delete [] D;
2055 }
2056
2057 // transpose matrix X from row format to column format
transpose(const problem * prob,feature_node ** x_space_ret,problem * prob_col)2058 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2059 {
2060 int i;
2061 int l = prob->l;
2062 int n = prob->n;
2063 size_t nnz = 0;
2064 size_t *col_ptr = new size_t [n+1];
2065 feature_node *x_space;
2066 prob_col->l = l;
2067 prob_col->n = n;
2068 prob_col->y = new double[l];
2069 prob_col->x = new feature_node*[n];
2070
2071 for(i=0; i<l; i++)
2072 prob_col->y[i] = prob->y[i];
2073
2074 for(i=0; i<n+1; i++)
2075 col_ptr[i] = 0;
2076 for(i=0; i<l; i++)
2077 {
2078 feature_node *x = prob->x[i];
2079 while(x->index != -1)
2080 {
2081 nnz++;
2082 col_ptr[x->index]++;
2083 x++;
2084 }
2085 }
2086 for(i=1; i<n+1; i++)
2087 col_ptr[i] += col_ptr[i-1] + 1;
2088
2089 x_space = new feature_node[nnz+n];
2090 for(i=0; i<n; i++)
2091 prob_col->x[i] = &x_space[col_ptr[i]];
2092
2093 for(i=0; i<l; i++)
2094 {
2095 feature_node *x = prob->x[i];
2096 while(x->index != -1)
2097 {
2098 int ind = x->index-1;
2099 x_space[col_ptr[ind]].index = i+1; // starts from 1
2100 x_space[col_ptr[ind]].value = x->value;
2101 col_ptr[ind]++;
2102 x++;
2103 }
2104 }
2105 for(i=0; i<n; i++)
2106 x_space[col_ptr[i]].index = -1;
2107
2108 *x_space_ret = x_space;
2109
2110 delete [] col_ptr;
2111 }
2112
2113 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2114 // perm, length l, must be allocated before calling this subroutine
group_classes(const problem * prob,int * nr_class_ret,int ** label_ret,int ** start_ret,int ** count_ret,int * perm)2115 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2116 {
2117 int l = prob->l;
2118 int max_nr_class = 16;
2119 int nr_class = 0;
2120 int *label = Malloc(int,max_nr_class);
2121 int *count = Malloc(int,max_nr_class);
2122 int *data_label = Malloc(int,l);
2123 int i;
2124
2125 for(i=0;i<l;i++)
2126 {
2127 int this_label = (int)prob->y[i];
2128 int j;
2129 for(j=0;j<nr_class;j++)
2130 {
2131 if(this_label == label[j])
2132 {
2133 ++count[j];
2134 break;
2135 }
2136 }
2137 data_label[i] = j;
2138 if(j == nr_class)
2139 {
2140 if(nr_class == max_nr_class)
2141 {
2142 max_nr_class *= 2;
2143 label = (int *)realloc(label,max_nr_class*sizeof(int));
2144 count = (int *)realloc(count,max_nr_class*sizeof(int));
2145 }
2146 label[nr_class] = this_label;
2147 count[nr_class] = 1;
2148 ++nr_class;
2149 }
2150 }
2151
2152 //
2153 // Labels are ordered by their first occurrence in the training set.
2154 // However, for two-class sets with -1/+1 labels and -1 appears first,
2155 // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2156 //
2157 if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2158 {
2159 swap(label[0],label[1]);
2160 swap(count[0],count[1]);
2161 for(i=0;i<l;i++)
2162 {
2163 if(data_label[i] == 0)
2164 data_label[i] = 1;
2165 else
2166 data_label[i] = 0;
2167 }
2168 }
2169
2170 int *start = Malloc(int,nr_class);
2171 start[0] = 0;
2172 for(i=1;i<nr_class;i++)
2173 start[i] = start[i-1]+count[i-1];
2174 for(i=0;i<l;i++)
2175 {
2176 perm[start[data_label[i]]] = i;
2177 ++start[data_label[i]];
2178 }
2179 start[0] = 0;
2180 for(i=1;i<nr_class;i++)
2181 start[i] = start[i-1]+count[i-1];
2182
2183 *nr_class_ret = nr_class;
2184 *label_ret = label;
2185 *start_ret = start;
2186 *count_ret = count;
2187 free(data_label);
2188 }
2189
train_one(const problem * prob,const parameter * param,double * w,double Cp,double Cn)2190 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2191 {
2192 //inner and outer tolerances for TRON
2193 double eps = param->eps;
2194 double eps_cg = 0.1;
2195 if(param->init_sol != NULL)
2196 eps_cg = 0.5;
2197
2198 int pos = 0;
2199 int neg = 0;
2200 for(int i=0;i<prob->l;i++)
2201 if(prob->y[i] > 0)
2202 pos++;
2203 neg = prob->l - pos;
2204 double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2205
2206 function *fun_obj=NULL;
2207 switch(param->solver_type)
2208 {
2209 case L2R_LR:
2210 {
2211 double *C = new double[prob->l];
2212 for(int i = 0; i < prob->l; i++)
2213 {
2214 if(prob->y[i] > 0)
2215 C[i] = Cp;
2216 else
2217 C[i] = Cn;
2218 }
2219 fun_obj=new l2r_lr_fun(prob, C);
2220 TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2221 tron_obj.set_print_string(liblinear_print_string);
2222 tron_obj.tron(w);
2223 delete fun_obj;
2224 delete[] C;
2225 break;
2226 }
2227 case L2R_L2LOSS_SVC:
2228 {
2229 double *C = new double[prob->l];
2230 for(int i = 0; i < prob->l; i++)
2231 {
2232 if(prob->y[i] > 0)
2233 C[i] = Cp;
2234 else
2235 C[i] = Cn;
2236 }
2237 fun_obj=new l2r_l2_svc_fun(prob, C);
2238 TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2239 tron_obj.set_print_string(liblinear_print_string);
2240 tron_obj.tron(w);
2241 delete fun_obj;
2242 delete[] C;
2243 break;
2244 }
2245 case L2R_L2LOSS_SVC_DUAL:
2246 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2247 break;
2248 case L2R_L1LOSS_SVC_DUAL:
2249 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2250 break;
2251 case L1R_L2LOSS_SVC:
2252 {
2253 problem prob_col;
2254 feature_node *x_space = NULL;
2255 transpose(prob, &x_space ,&prob_col);
2256 solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2257 delete [] prob_col.y;
2258 delete [] prob_col.x;
2259 delete [] x_space;
2260 break;
2261 }
2262 case L1R_LR:
2263 {
2264 problem prob_col;
2265 feature_node *x_space = NULL;
2266 transpose(prob, &x_space ,&prob_col);
2267 solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2268 delete [] prob_col.y;
2269 delete [] prob_col.x;
2270 delete [] x_space;
2271 break;
2272 }
2273 case L2R_LR_DUAL:
2274 solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2275 break;
2276 case L2R_L2LOSS_SVR:
2277 {
2278 double *C = new double[prob->l];
2279 for(int i = 0; i < prob->l; i++)
2280 C[i] = param->C;
2281
2282 fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2283 TRON tron_obj(fun_obj, param->eps);
2284 tron_obj.set_print_string(liblinear_print_string);
2285 tron_obj.tron(w);
2286 delete fun_obj;
2287 delete[] C;
2288 break;
2289
2290 }
2291 case L2R_L1LOSS_SVR_DUAL:
2292 solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2293 break;
2294 case L2R_L2LOSS_SVR_DUAL:
2295 solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2296 break;
2297 default:
2298 fprintf(stderr, "ERROR: unknown solver_type\n");
2299 break;
2300 }
2301 }
2302
2303 // Calculate the initial C for parameter selection
calc_start_C(const problem * prob,const parameter * param)2304 static double calc_start_C(const problem *prob, const parameter *param)
2305 {
2306 int i;
2307 double xTx,max_xTx;
2308 max_xTx = 0;
2309 for(i=0; i<prob->l; i++)
2310 {
2311 xTx = 0;
2312 feature_node *xi=prob->x[i];
2313 while(xi->index != -1)
2314 {
2315 double val = xi->value;
2316 xTx += val*val;
2317 xi++;
2318 }
2319 if(xTx > max_xTx)
2320 max_xTx = xTx;
2321 }
2322
2323 double min_C = 1.0;
2324 if(param->solver_type == L2R_LR)
2325 min_C = 1.0 / (prob->l * max_xTx);
2326 else if(param->solver_type == L2R_L2LOSS_SVC)
2327 min_C = 1.0 / (2 * prob->l * max_xTx);
2328
2329 return pow( 2, floor(log(min_C) / log(2.0)) );
2330 }
2331
2332
2333 //
2334 // Interface functions
2335 //
train(const problem * prob,const parameter * param)2336 model* train(const problem *prob, const parameter *param)
2337 {
2338 int i,j;
2339 int l = prob->l;
2340 int n = prob->n;
2341 int w_size = prob->n;
2342 model *model_ = Malloc(model,1);
2343
2344 if(prob->bias>=0)
2345 model_->nr_feature=n-1;
2346 else
2347 model_->nr_feature=n;
2348 model_->param = *param;
2349 model_->bias = prob->bias;
2350
2351 if(check_regression_model(model_))
2352 {
2353 model_->w = Malloc(double, w_size);
2354 for(i=0; i<w_size; i++)
2355 model_->w[i] = 0;
2356 model_->nr_class = 2;
2357 model_->label = NULL;
2358 train_one(prob, param, model_->w, 0, 0);
2359 }
2360 else
2361 {
2362 int nr_class;
2363 int *label = NULL;
2364 int *start = NULL;
2365 int *count = NULL;
2366 int *perm = Malloc(int,l);
2367
2368 // group training data of the same class
2369 group_classes(prob,&nr_class,&label,&start,&count,perm);
2370
2371 model_->nr_class=nr_class;
2372 model_->label = Malloc(int,nr_class);
2373 for(i=0;i<nr_class;i++)
2374 model_->label[i] = label[i];
2375
2376 // calculate weighted C
2377 double *weighted_C = Malloc(double, nr_class);
2378 for(i=0;i<nr_class;i++)
2379 weighted_C[i] = param->C;
2380 for(i=0;i<param->nr_weight;i++)
2381 {
2382 for(j=0;j<nr_class;j++)
2383 if(param->weight_label[i] == label[j])
2384 break;
2385 if(j == nr_class)
2386 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2387 else
2388 weighted_C[j] *= param->weight[i];
2389 }
2390
2391 // constructing the subproblem
2392 feature_node **x = Malloc(feature_node *,l);
2393 for(i=0;i<l;i++)
2394 x[i] = prob->x[perm[i]];
2395
2396 int k;
2397 problem sub_prob;
2398 sub_prob.l = l;
2399 sub_prob.n = n;
2400 sub_prob.x = Malloc(feature_node *,sub_prob.l);
2401 sub_prob.y = Malloc(double,sub_prob.l);
2402
2403 for(k=0; k<sub_prob.l; k++)
2404 sub_prob.x[k] = x[k];
2405
2406 // multi-class svm by Crammer and Singer
2407 if(param->solver_type == MCSVM_CS)
2408 {
2409 model_->w=Malloc(double, n*nr_class);
2410 for(i=0;i<nr_class;i++)
2411 for(j=start[i];j<start[i]+count[i];j++)
2412 sub_prob.y[j] = i;
2413 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2414 Solver.Solve(model_->w);
2415 }
2416 else
2417 {
2418 if(nr_class == 2)
2419 {
2420 model_->w=Malloc(double, w_size);
2421
2422 int e0 = start[0]+count[0];
2423 k=0;
2424 for(; k<e0; k++)
2425 sub_prob.y[k] = +1;
2426 for(; k<sub_prob.l; k++)
2427 sub_prob.y[k] = -1;
2428
2429 if(param->init_sol != NULL)
2430 for(i=0;i<w_size;i++)
2431 model_->w[i] = param->init_sol[i];
2432 else
2433 for(i=0;i<w_size;i++)
2434 model_->w[i] = 0;
2435
2436 train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
2437 }
2438 else
2439 {
2440 model_->w=Malloc(double, w_size*nr_class);
2441 double *w=Malloc(double, w_size);
2442 for(i=0;i<nr_class;i++)
2443 {
2444 int si = start[i];
2445 int ei = si+count[i];
2446
2447 k=0;
2448 for(; k<si; k++)
2449 sub_prob.y[k] = -1;
2450 for(; k<ei; k++)
2451 sub_prob.y[k] = +1;
2452 for(; k<sub_prob.l; k++)
2453 sub_prob.y[k] = -1;
2454
2455 if(param->init_sol != NULL)
2456 for(j=0;j<w_size;j++)
2457 w[j] = param->init_sol[j*nr_class+i];
2458 else
2459 for(j=0;j<w_size;j++)
2460 w[j] = 0;
2461
2462 train_one(&sub_prob, param, w, weighted_C[i], param->C);
2463
2464 for(j=0;j<w_size;j++)
2465 model_->w[j*nr_class+i] = w[j];
2466 }
2467 free(w);
2468 }
2469
2470 }
2471
2472 free(x);
2473 free(label);
2474 free(start);
2475 free(count);
2476 free(perm);
2477 free(sub_prob.x);
2478 free(sub_prob.y);
2479 free(weighted_C);
2480 }
2481 return model_;
2482 }
2483
cross_validation(const problem * prob,const parameter * param,int nr_fold,double * target)2484 void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
2485 {
2486 int i;
2487 int *fold_start;
2488 int l = prob->l;
2489 int *perm = Malloc(int,l);
2490 if (nr_fold > l)
2491 {
2492 nr_fold = l;
2493 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2494 }
2495 fold_start = Malloc(int,nr_fold+1);
2496 for(i=0;i<l;i++) perm[i]=i;
2497 for(i=0;i<l;i++)
2498 {
2499 int j = i+rand()%(l-i);
2500 swap(perm[i],perm[j]);
2501 }
2502 for(i=0;i<=nr_fold;i++)
2503 fold_start[i]=i*l/nr_fold;
2504
2505 for(i=0;i<nr_fold;i++)
2506 {
2507 int begin = fold_start[i];
2508 int end = fold_start[i+1];
2509 int j,k;
2510 struct problem subprob;
2511
2512 subprob.bias = prob->bias;
2513 subprob.n = prob->n;
2514 subprob.l = l-(end-begin);
2515 subprob.x = Malloc(struct feature_node*,subprob.l);
2516 subprob.y = Malloc(double,subprob.l);
2517
2518 k=0;
2519 for(j=0;j<begin;j++)
2520 {
2521 subprob.x[k] = prob->x[perm[j]];
2522 subprob.y[k] = prob->y[perm[j]];
2523 ++k;
2524 }
2525 for(j=end;j<l;j++)
2526 {
2527 subprob.x[k] = prob->x[perm[j]];
2528 subprob.y[k] = prob->y[perm[j]];
2529 ++k;
2530 }
2531 struct model *submodel = train(&subprob,param);
2532 for(j=begin;j<end;j++)
2533 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2534 free_and_destroy_model(&submodel);
2535 free(subprob.x);
2536 free(subprob.y);
2537 }
2538 free(fold_start);
2539 free(perm);
2540 }
2541
find_parameter_C(const problem * prob,const parameter * param,int nr_fold,double start_C,double max_C,double * best_C,double * best_rate)2542 void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
2543 {
2544 // variables for CV
2545 int i;
2546 int *fold_start;
2547 int l = prob->l;
2548 int *perm = Malloc(int, l);
2549 double *target = Malloc(double, prob->l);
2550 struct problem *subprob = Malloc(problem,nr_fold);
2551
2552 // variables for warm start
2553 double ratio = 2;
2554 double **prev_w = Malloc(double*, nr_fold);
2555 for(i = 0; i < nr_fold; i++)
2556 prev_w[i] = NULL;
2557 int num_unchanged_w = 0;
2558 struct parameter param1 = *param;
2559 void (*default_print_string) (const char *) = liblinear_print_string;
2560
2561 if (nr_fold > l)
2562 {
2563 nr_fold = l;
2564 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2565 }
2566 fold_start = Malloc(int,nr_fold+1);
2567 for(i=0;i<l;i++) perm[i]=i;
2568 for(i=0;i<l;i++)
2569 {
2570 int j = i+rand()%(l-i);
2571 swap(perm[i],perm[j]);
2572 }
2573 for(i=0;i<=nr_fold;i++)
2574 fold_start[i]=i*l/nr_fold;
2575
2576 for(i=0;i<nr_fold;i++)
2577 {
2578 int begin = fold_start[i];
2579 int end = fold_start[i+1];
2580 int j,k;
2581
2582 subprob[i].bias = prob->bias;
2583 subprob[i].n = prob->n;
2584 subprob[i].l = l-(end-begin);
2585 subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
2586 subprob[i].y = Malloc(double,subprob[i].l);
2587
2588 k=0;
2589 for(j=0;j<begin;j++)
2590 {
2591 subprob[i].x[k] = prob->x[perm[j]];
2592 subprob[i].y[k] = prob->y[perm[j]];
2593 ++k;
2594 }
2595 for(j=end;j<l;j++)
2596 {
2597 subprob[i].x[k] = prob->x[perm[j]];
2598 subprob[i].y[k] = prob->y[perm[j]];
2599 ++k;
2600 }
2601
2602 }
2603
2604 *best_rate = 0;
2605 if(start_C <= 0)
2606 start_C = calc_start_C(prob,param);
2607 param1.C = start_C;
2608
2609 while(param1.C <= max_C)
2610 {
2611 //Output disabled for running CV at a particular C
2612 set_print_string_function(&print_null);
2613
2614 for(i=0; i<nr_fold; i++)
2615 {
2616 int j;
2617 int begin = fold_start[i];
2618 int end = fold_start[i+1];
2619
2620 param1.init_sol = prev_w[i];
2621 struct model *submodel = train(&subprob[i],¶m1);
2622
2623 int total_w_size;
2624 if(submodel->nr_class == 2)
2625 total_w_size = subprob[i].n;
2626 else
2627 total_w_size = subprob[i].n * submodel->nr_class;
2628
2629 if(prev_w[i] == NULL)
2630 {
2631 prev_w[i] = Malloc(double, total_w_size);
2632 for(j=0; j<total_w_size; j++)
2633 prev_w[i][j] = submodel->w[j];
2634 }
2635 else if(num_unchanged_w >= 0)
2636 {
2637 double norm_w_diff = 0;
2638 for(j=0; j<total_w_size; j++)
2639 {
2640 norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2641 prev_w[i][j] = submodel->w[j];
2642 }
2643 norm_w_diff = sqrt(norm_w_diff);
2644
2645 if(norm_w_diff > 1e-15)
2646 num_unchanged_w = -1;
2647 }
2648 else
2649 {
2650 for(j=0; j<total_w_size; j++)
2651 prev_w[i][j] = submodel->w[j];
2652 }
2653
2654 for(j=begin; j<end; j++)
2655 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2656
2657 free_and_destroy_model(&submodel);
2658 }
2659 set_print_string_function(default_print_string);
2660
2661 int total_correct = 0;
2662 for(i=0; i<prob->l; i++)
2663 if(target[i] == prob->y[i])
2664 ++total_correct;
2665 double current_rate = (double)total_correct/prob->l;
2666 if(current_rate > *best_rate)
2667 {
2668 *best_C = param1.C;
2669 *best_rate = current_rate;
2670 }
2671
2672 info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
2673 num_unchanged_w++;
2674 if(num_unchanged_w == 3)
2675 break;
2676 param1.C = param1.C*ratio;
2677 }
2678
2679 if(param1.C > max_C && max_C > start_C)
2680 info("warning: maximum C reached.\n");
2681 free(fold_start);
2682 free(perm);
2683 free(target);
2684 for(i=0; i<nr_fold; i++)
2685 {
2686 free(subprob[i].x);
2687 free(subprob[i].y);
2688 free(prev_w[i]);
2689 }
2690 free(prev_w);
2691 free(subprob);
2692 }
2693
predict_values(const struct model * model_,const struct feature_node * x,double * dec_values)2694 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2695 {
2696 int idx;
2697 int n;
2698 if(model_->bias>=0)
2699 n=model_->nr_feature+1;
2700 else
2701 n=model_->nr_feature;
2702 double *w=model_->w;
2703 int nr_class=model_->nr_class;
2704 int i;
2705 int nr_w;
2706 if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2707 nr_w = 1;
2708 else
2709 nr_w = nr_class;
2710
2711 const feature_node *lx=x;
2712 for(i=0;i<nr_w;i++)
2713 dec_values[i] = 0;
2714 for(; (idx=lx->index)!=-1; lx++)
2715 {
2716 // the dimension of testing data may exceed that of training
2717 if(idx<=n)
2718 for(i=0;i<nr_w;i++)
2719 dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2720 }
2721
2722 if(nr_class==2)
2723 {
2724 if(check_regression_model(model_))
2725 return dec_values[0];
2726 else
2727 return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2728 }
2729 else
2730 {
2731 int dec_max_idx = 0;
2732 for(i=1;i<nr_class;i++)
2733 {
2734 if(dec_values[i] > dec_values[dec_max_idx])
2735 dec_max_idx = i;
2736 }
2737 return model_->label[dec_max_idx];
2738 }
2739 }
2740
predict(const model * model_,const feature_node * x)2741 double predict(const model *model_, const feature_node *x)
2742 {
2743 double *dec_values = Malloc(double, model_->nr_class);
2744 double label=predict_values(model_, x, dec_values);
2745 free(dec_values);
2746 return label;
2747 }
2748
predict_probability(const struct model * model_,const struct feature_node * x,double * prob_estimates)2749 double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2750 {
2751 if(check_probability_model(model_))
2752 {
2753 int i;
2754 int nr_class=model_->nr_class;
2755 int nr_w;
2756 if(nr_class==2)
2757 nr_w = 1;
2758 else
2759 nr_w = nr_class;
2760
2761 double label=predict_values(model_, x, prob_estimates);
2762 for(i=0;i<nr_w;i++)
2763 prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2764
2765 if(nr_class==2) // for binary classification
2766 prob_estimates[1]=1.-prob_estimates[0];
2767 else
2768 {
2769 double sum=0;
2770 for(i=0; i<nr_class; i++)
2771 sum+=prob_estimates[i];
2772
2773 for(i=0; i<nr_class; i++)
2774 prob_estimates[i]=prob_estimates[i]/sum;
2775 }
2776
2777 return label;
2778 }
2779 else
2780 return 0;
2781 }
2782
2783 static const char *solver_type_table[]=
2784 {
2785 "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2786 "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2787 "", "", "",
2788 "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
2789 };
2790
save_model(const char * model_file_name,const struct model * model_)2791 int save_model(const char *model_file_name, const struct model *model_)
2792 {
2793 int i;
2794 int nr_feature=model_->nr_feature;
2795 int n;
2796 const parameter& param = model_->param;
2797
2798 if(model_->bias>=0)
2799 n=nr_feature+1;
2800 else
2801 n=nr_feature;
2802 int w_size = n;
2803 FILE *fp = fopen(model_file_name,"w");
2804 if(fp==NULL) return -1;
2805
2806 char *old_locale = setlocale(LC_ALL, NULL);
2807 if (old_locale)
2808 {
2809 old_locale = strdup(old_locale);
2810 }
2811 setlocale(LC_ALL, "C");
2812
2813 int nr_w;
2814 if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2815 nr_w=1;
2816 else
2817 nr_w=model_->nr_class;
2818
2819 fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2820 fprintf(fp, "nr_class %d\n", model_->nr_class);
2821
2822 if(model_->label)
2823 {
2824 fprintf(fp, "label");
2825 for(i=0; i<model_->nr_class; i++)
2826 fprintf(fp, " %d", model_->label[i]);
2827 fprintf(fp, "\n");
2828 }
2829
2830 fprintf(fp, "nr_feature %d\n", nr_feature);
2831
2832 fprintf(fp, "bias %.17g\n", model_->bias);
2833
2834 fprintf(fp, "w\n");
2835 for(i=0; i<w_size; i++)
2836 {
2837 int j;
2838 for(j=0; j<nr_w; j++)
2839 fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
2840 fprintf(fp, "\n");
2841 }
2842
2843 setlocale(LC_ALL, old_locale);
2844 free(old_locale);
2845
2846 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2847 else return 0;
2848 }
2849
2850 //
2851 // FSCANF helps to handle fscanf failures.
2852 // Its do-while block avoids the ambiguity when
2853 // if (...)
2854 // FSCANF();
2855 // is used
2856 //
2857 #define FSCANF(_stream, _format, _var)do\
2858 {\
2859 if (fscanf(_stream, _format, _var) != 1)\
2860 {\
2861 fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
2862 EXIT_LOAD_MODEL()\
2863 }\
2864 }while(0)
2865 // EXIT_LOAD_MODEL should NOT end with a semicolon.
2866 #define EXIT_LOAD_MODEL()\
2867 {\
2868 setlocale(LC_ALL, old_locale);\
2869 free(model_->label);\
2870 free(model_);\
2871 free(old_locale);\
2872 return NULL;\
2873 }
load_model(const char * model_file_name)2874 struct model *load_model(const char *model_file_name)
2875 {
2876 FILE *fp = fopen(model_file_name,"r");
2877 if(fp==NULL) return NULL;
2878
2879 int i;
2880 int nr_feature;
2881 int n;
2882 int nr_class;
2883 double bias;
2884 model *model_ = Malloc(model,1);
2885 parameter& param = model_->param;
2886 // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2887 param.nr_weight = 0;
2888 param.weight_label = NULL;
2889 param.weight = NULL;
2890 param.init_sol = NULL;
2891
2892 model_->label = NULL;
2893
2894 char *old_locale = setlocale(LC_ALL, NULL);
2895 if (old_locale)
2896 {
2897 old_locale = strdup(old_locale);
2898 }
2899 setlocale(LC_ALL, "C");
2900
2901 char cmd[81];
2902 while(1)
2903 {
2904 FSCANF(fp,"%80s",cmd);
2905 if(strcmp(cmd,"solver_type")==0)
2906 {
2907 FSCANF(fp,"%80s",cmd);
2908 int i;
2909 for(i=0;solver_type_table[i];i++)
2910 {
2911 if(strcmp(solver_type_table[i],cmd)==0)
2912 {
2913 param.solver_type=i;
2914 break;
2915 }
2916 }
2917 if(solver_type_table[i] == NULL)
2918 {
2919 fprintf(stderr,"unknown solver type.\n");
2920 EXIT_LOAD_MODEL()
2921 }
2922 }
2923 else if(strcmp(cmd,"nr_class")==0)
2924 {
2925 FSCANF(fp,"%d",&nr_class);
2926 model_->nr_class=nr_class;
2927 }
2928 else if(strcmp(cmd,"nr_feature")==0)
2929 {
2930 FSCANF(fp,"%d",&nr_feature);
2931 model_->nr_feature=nr_feature;
2932 }
2933 else if(strcmp(cmd,"bias")==0)
2934 {
2935 FSCANF(fp,"%lf",&bias);
2936 model_->bias=bias;
2937 }
2938 else if(strcmp(cmd,"w")==0)
2939 {
2940 break;
2941 }
2942 else if(strcmp(cmd,"label")==0)
2943 {
2944 int nr_class = model_->nr_class;
2945 model_->label = Malloc(int,nr_class);
2946 for(int i=0;i<nr_class;i++)
2947 FSCANF(fp,"%d",&model_->label[i]);
2948 }
2949 else
2950 {
2951 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2952 EXIT_LOAD_MODEL()
2953 }
2954 }
2955
2956 nr_feature=model_->nr_feature;
2957 if(model_->bias>=0)
2958 n=nr_feature+1;
2959 else
2960 n=nr_feature;
2961 int w_size = n;
2962 int nr_w;
2963 if(nr_class==2 && param.solver_type != MCSVM_CS)
2964 nr_w = 1;
2965 else
2966 nr_w = nr_class;
2967
2968 model_->w=Malloc(double, w_size*nr_w);
2969 for(i=0; i<w_size; i++)
2970 {
2971 int j;
2972 for(j=0; j<nr_w; j++)
2973 FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
2974 }
2975
2976 setlocale(LC_ALL, old_locale);
2977 free(old_locale);
2978
2979 if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2980
2981 return model_;
2982 }
2983
get_nr_feature(const model * model_)2984 int get_nr_feature(const model *model_)
2985 {
2986 return model_->nr_feature;
2987 }
2988
get_nr_class(const model * model_)2989 int get_nr_class(const model *model_)
2990 {
2991 return model_->nr_class;
2992 }
2993
get_labels(const model * model_,int * label)2994 void get_labels(const model *model_, int* label)
2995 {
2996 if (model_->label != NULL)
2997 for(int i=0;i<model_->nr_class;i++)
2998 label[i] = model_->label[i];
2999 }
3000
3001 // use inline here for better performance (around 20% faster than the non-inline one)
get_w_value(const struct model * model_,int idx,int label_idx)3002 static inline double get_w_value(const struct model *model_, int idx, int label_idx)
3003 {
3004 int nr_class = model_->nr_class;
3005 int solver_type = model_->param.solver_type;
3006 const double *w = model_->w;
3007
3008 if(idx < 0 || idx > model_->nr_feature)
3009 return 0;
3010 if(check_regression_model(model_))
3011 return w[idx];
3012 else
3013 {
3014 if(label_idx < 0 || label_idx >= nr_class)
3015 return 0;
3016 if(nr_class == 2 && solver_type != MCSVM_CS)
3017 {
3018 if(label_idx == 0)
3019 return w[idx];
3020 else
3021 return -w[idx];
3022 }
3023 else
3024 return w[idx*nr_class+label_idx];
3025 }
3026 }
3027
3028 // feat_idx: starting from 1 to nr_feature
3029 // label_idx: starting from 0 to nr_class-1 for classification models;
3030 // for regression models, label_idx is ignored.
get_decfun_coef(const struct model * model_,int feat_idx,int label_idx)3031 double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3032 {
3033 if(feat_idx > model_->nr_feature)
3034 return 0;
3035 return get_w_value(model_, feat_idx-1, label_idx);
3036 }
3037
get_decfun_bias(const struct model * model_,int label_idx)3038 double get_decfun_bias(const struct model *model_, int label_idx)
3039 {
3040 int bias_idx = model_->nr_feature;
3041 double bias = model_->bias;
3042 if(bias <= 0)
3043 return 0;
3044 else
3045 return bias*get_w_value(model_, bias_idx, label_idx);
3046 }
3047
free_model_content(struct model * model_ptr)3048 void free_model_content(struct model *model_ptr)
3049 {
3050 if(model_ptr->w != NULL)
3051 free(model_ptr->w);
3052 if(model_ptr->label != NULL)
3053 free(model_ptr->label);
3054 }
3055
free_and_destroy_model(struct model ** model_ptr_ptr)3056 void free_and_destroy_model(struct model **model_ptr_ptr)
3057 {
3058 struct model *model_ptr = *model_ptr_ptr;
3059 if(model_ptr != NULL)
3060 {
3061 free_model_content(model_ptr);
3062 free(model_ptr);
3063 }
3064 }
3065
destroy_param(parameter * param)3066 void destroy_param(parameter* param)
3067 {
3068 if(param->weight_label != NULL)
3069 free(param->weight_label);
3070 if(param->weight != NULL)
3071 free(param->weight);
3072 if(param->init_sol != NULL)
3073 free(param->init_sol);
3074 }
3075
check_parameter(const problem *,const parameter * param)3076 const char *check_parameter(const problem*, const parameter *param)
3077 {
3078 if(param->eps <= 0)
3079 return "eps <= 0";
3080
3081 if(param->C <= 0)
3082 return "C <= 0";
3083
3084 if(param->p < 0)
3085 return "p < 0";
3086
3087 if(param->solver_type != L2R_LR
3088 && param->solver_type != L2R_L2LOSS_SVC_DUAL
3089 && param->solver_type != L2R_L2LOSS_SVC
3090 && param->solver_type != L2R_L1LOSS_SVC_DUAL
3091 && param->solver_type != MCSVM_CS
3092 && param->solver_type != L1R_L2LOSS_SVC
3093 && param->solver_type != L1R_LR
3094 && param->solver_type != L2R_LR_DUAL
3095 && param->solver_type != L2R_L2LOSS_SVR
3096 && param->solver_type != L2R_L2LOSS_SVR_DUAL
3097 && param->solver_type != L2R_L1LOSS_SVR_DUAL)
3098 return "unknown solver type";
3099
3100 if(param->init_sol != NULL
3101 && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
3102 return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
3103
3104 return NULL;
3105 }
3106
check_probability_model(const struct model * model_)3107 int check_probability_model(const struct model *model_)
3108 {
3109 return (model_->param.solver_type==L2R_LR ||
3110 model_->param.solver_type==L2R_LR_DUAL ||
3111 model_->param.solver_type==L1R_LR);
3112 }
3113
check_regression_model(const struct model * model_)3114 int check_regression_model(const struct model *model_)
3115 {
3116 return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3117 model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3118 model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3119 }
3120
set_print_string_function(void (* print_func)(const char *))3121 void set_print_string_function(void (*print_func)(const char*))
3122 {
3123 if (print_func == NULL)
3124 liblinear_print_string = &print_string_stdout;
3125 else
3126 liblinear_print_string = print_func;
3127 }
3128
3129