1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <stdarg.h>
6 #include "linear.h"
7 #include "tron.h"
8 typedef signed char schar;
swap(T & x,T & y)9 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
10 #ifndef min
min(T x,T y)11 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
12 #endif
13 #ifndef max
max(T x,T y)14 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
15 #endif
clone(T * & dst,S * src,int n)16 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
17 {
18 dst = new T[n];
19 memcpy((void *)dst,(void *)src,sizeof(T)*n);
20 }
21 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
22 #define INF HUGE_VAL
23
print_string_stdout(const char * s)24 static void print_string_stdout(const char *s)
25 {
26 fputs(s,stdout);
27 fflush(stdout);
28 }
29
30 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
31
32 #if 1
info(const char * fmt,...)33 static void info(const char *fmt,...)
34 {
35 char buf[BUFSIZ];
36 va_list ap;
37 va_start(ap,fmt);
38 vsprintf(buf,fmt,ap);
39 va_end(ap);
40 (*liblinear_print_string)(buf);
41 }
42 #else
info(const char * fmt,...)43 static void info(const char *fmt,...) {}
44 #endif
45
46 class l2r_lr_fun : public function
47 {
48 public:
49 l2r_lr_fun(const problem *prob, double Cp, double Cn);
50 ~l2r_lr_fun();
51
52 double fun(double *w);
53 void grad(double *w, double *g);
54 void Hv(double *s, double *Hs);
55
56 int get_nr_variable(void);
57
58 private:
59 void Xv(double *v, double *Xv);
60 void XTv(double *v, double *XTv);
61
62 double *C;
63 double *z;
64 double *D;
65 const problem *prob;
66 };
67
l2r_lr_fun(const problem * prob,double Cp,double Cn)68 l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn)
69 {
70 int i;
71 int l=prob->l;
72 int *y=prob->y;
73
74 this->prob = prob;
75
76 z = new double[l];
77 D = new double[l];
78 C = new double[l];
79
80 for (i=0; i<l; i++)
81 {
82 if (y[i] == 1)
83 C[i] = Cp;
84 else
85 C[i] = Cn;
86 }
87 }
88
~l2r_lr_fun()89 l2r_lr_fun::~l2r_lr_fun()
90 {
91 delete[] z;
92 delete[] D;
93 delete[] C;
94 }
95
96
fun(double * w)97 double l2r_lr_fun::fun(double *w)
98 {
99 int i;
100 double f=0;
101 int *y=prob->y;
102 int l=prob->l;
103 int w_size=get_nr_variable();
104
105 Xv(w, z);
106 for(i=0;i<l;i++)
107 {
108 double yz = y[i]*z[i];
109 if (yz >= 0)
110 f += C[i]*log(1 + exp(-yz));
111 else
112 f += C[i]*(-yz+log(1 + exp(yz)));
113 }
114 f = 2*f;
115 for(i=0;i<w_size;i++)
116 f += w[i]*w[i];
117 f /= 2.0;
118
119 return(f);
120 }
121
grad(double * w,double * g)122 void l2r_lr_fun::grad(double *w, double *g)
123 {
124 int i;
125 int *y=prob->y;
126 int l=prob->l;
127 int w_size=get_nr_variable();
128
129 for(i=0;i<l;i++)
130 {
131 z[i] = 1/(1 + exp(-y[i]*z[i]));
132 D[i] = z[i]*(1-z[i]);
133 z[i] = C[i]*(z[i]-1)*y[i];
134 }
135 XTv(z, g);
136
137 for(i=0;i<w_size;i++)
138 g[i] = w[i] + g[i];
139 }
140
get_nr_variable(void)141 int l2r_lr_fun::get_nr_variable(void)
142 {
143 return prob->n;
144 }
145
Hv(double * s,double * Hs)146 void l2r_lr_fun::Hv(double *s, double *Hs)
147 {
148 int i;
149 int l=prob->l;
150 int w_size=get_nr_variable();
151 double *wa = new double[l];
152
153 Xv(s, wa);
154 for(i=0;i<l;i++)
155 wa[i] = C[i]*D[i]*wa[i];
156
157 XTv(wa, Hs);
158 for(i=0;i<w_size;i++)
159 Hs[i] = s[i] + Hs[i];
160 delete[] wa;
161 }
162
Xv(double * v,double * Xv)163 void l2r_lr_fun::Xv(double *v, double *Xv)
164 {
165 int i;
166 int l=prob->l;
167 feature_node **x=prob->x;
168
169 for(i=0;i<l;i++)
170 {
171 feature_node *s=x[i];
172 Xv[i]=0;
173 while(s->index!=-1)
174 {
175 Xv[i]+=v[s->index-1]*s->value;
176 s++;
177 }
178 }
179 }
180
XTv(double * v,double * XTv)181 void l2r_lr_fun::XTv(double *v, double *XTv)
182 {
183 int i;
184 int l=prob->l;
185 int w_size=get_nr_variable();
186 feature_node **x=prob->x;
187
188 for(i=0;i<w_size;i++)
189 XTv[i]=0;
190 for(i=0;i<l;i++)
191 {
192 feature_node *s=x[i];
193 while(s->index!=-1)
194 {
195 XTv[s->index-1]+=v[i]*s->value;
196 s++;
197 }
198 }
199 }
200
201 class l2r_l2_svc_fun : public function
202 {
203 public:
204 l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);
205 ~l2r_l2_svc_fun();
206
207 double fun(double *w);
208 void grad(double *w, double *g);
209 void Hv(double *s, double *Hs);
210
211 int get_nr_variable(void);
212
213 private:
214 void Xv(double *v, double *Xv);
215 void subXv(double *v, double *Xv);
216 void subXTv(double *v, double *XTv);
217
218 double *C;
219 double *z;
220 double *D;
221 int *I;
222 int sizeI;
223 const problem *prob;
224 };
225
l2r_l2_svc_fun(const problem * prob,double Cp,double Cn)226 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn)
227 {
228 int i;
229 int l=prob->l;
230 int *y=prob->y;
231
232 this->prob = prob;
233
234 z = new double[l];
235 D = new double[l];
236 C = new double[l];
237 I = new int[l];
238
239 for (i=0; i<l; i++)
240 {
241 if (y[i] == 1)
242 C[i] = Cp;
243 else
244 C[i] = Cn;
245 }
246 }
247
~l2r_l2_svc_fun()248 l2r_l2_svc_fun::~l2r_l2_svc_fun()
249 {
250 delete[] z;
251 delete[] D;
252 delete[] C;
253 delete[] I;
254 }
255
fun(double * w)256 double l2r_l2_svc_fun::fun(double *w)
257 {
258 int i;
259 double f=0;
260 int *y=prob->y;
261 int l=prob->l;
262 int w_size=get_nr_variable();
263
264 Xv(w, z);
265 for(i=0;i<l;i++)
266 {
267 z[i] = y[i]*z[i];
268 double d = 1-z[i];
269 if (d > 0)
270 f += C[i]*d*d;
271 }
272 f = 2*f;
273 for(i=0;i<w_size;i++)
274 f += w[i]*w[i];
275 f /= 2.0;
276
277 return(f);
278 }
279
grad(double * w,double * g)280 void l2r_l2_svc_fun::grad(double *w, double *g)
281 {
282 int i;
283 int *y=prob->y;
284 int l=prob->l;
285 int w_size=get_nr_variable();
286
287 sizeI = 0;
288 for (i=0;i<l;i++)
289 if (z[i] < 1)
290 {
291 z[sizeI] = C[i]*y[i]*(z[i]-1);
292 I[sizeI] = i;
293 sizeI++;
294 }
295 subXTv(z, g);
296
297 for(i=0;i<w_size;i++)
298 g[i] = w[i] + 2*g[i];
299 }
300
get_nr_variable(void)301 int l2r_l2_svc_fun::get_nr_variable(void)
302 {
303 return prob->n;
304 }
305
Hv(double * s,double * Hs)306 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
307 {
308 int i;
309 int l=prob->l;
310 int w_size=get_nr_variable();
311 double *wa = new double[l];
312
313 subXv(s, wa);
314 for(i=0;i<sizeI;i++)
315 wa[i] = C[I[i]]*wa[i];
316
317 subXTv(wa, Hs);
318 for(i=0;i<w_size;i++)
319 Hs[i] = s[i] + 2*Hs[i];
320 delete[] wa;
321 }
322
Xv(double * v,double * Xv)323 void l2r_l2_svc_fun::Xv(double *v, double *Xv)
324 {
325 int i;
326 int l=prob->l;
327 feature_node **x=prob->x;
328
329 for(i=0;i<l;i++)
330 {
331 feature_node *s=x[i];
332 Xv[i]=0;
333 while(s->index!=-1)
334 {
335 Xv[i]+=v[s->index-1]*s->value;
336 s++;
337 }
338 }
339 }
340
subXv(double * v,double * Xv)341 void l2r_l2_svc_fun::subXv(double *v, double *Xv)
342 {
343 int i;
344 feature_node **x=prob->x;
345
346 for(i=0;i<sizeI;i++)
347 {
348 feature_node *s=x[I[i]];
349 Xv[i]=0;
350 while(s->index!=-1)
351 {
352 Xv[i]+=v[s->index-1]*s->value;
353 s++;
354 }
355 }
356 }
357
subXTv(double * v,double * XTv)358 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
359 {
360 int i;
361 int w_size=get_nr_variable();
362 feature_node **x=prob->x;
363
364 for(i=0;i<w_size;i++)
365 XTv[i]=0;
366 for(i=0;i<sizeI;i++)
367 {
368 feature_node *s=x[I[i]];
369 while(s->index!=-1)
370 {
371 XTv[s->index-1]+=v[i]*s->value;
372 s++;
373 }
374 }
375 }
376
377 // A coordinate descent algorithm for
378 // multi-class support vector machines by Crammer and Singer
379 //
380 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
381 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
382 //
383 // where e^m_i = 0 if y_i = m,
384 // e^m_i = 1 if y_i != m,
385 // C^m_i = C if m = y_i,
386 // C^m_i = 0 if m != y_i,
387 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
388 //
389 // Given:
390 // x, y, C
391 // eps is the stopping tolerance
392 //
393 // solution will be put in w
394 //
395 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
396
397 #define GETI(i) (prob->y[i])
398 // To support weights for instances, use GETI(i) (i)
399
400 class Solver_MCSVM_CS
401 {
402 public:
403 Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
404 ~Solver_MCSVM_CS();
405 void Solve(double *w);
406 private:
407 void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
408 bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
409 double *B, *C, *G;
410 int w_size, l;
411 int nr_class;
412 int max_iter;
413 double eps;
414 const problem *prob;
415 };
416
Solver_MCSVM_CS(const problem * prob,int nr_class,double * weighted_C,double eps,int max_iter)417 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
418 {
419 this->w_size = prob->n;
420 this->l = prob->l;
421 this->nr_class = nr_class;
422 this->eps = eps;
423 this->max_iter = max_iter;
424 this->prob = prob;
425 this->B = new double[nr_class];
426 this->G = new double[nr_class];
427 this->C = weighted_C;
428 }
429
~Solver_MCSVM_CS()430 Solver_MCSVM_CS::~Solver_MCSVM_CS()
431 {
432 delete[] B;
433 delete[] G;
434 }
435
compare_double(const void * a,const void * b)436 int compare_double(const void *a, const void *b)
437 {
438 if(*(double *)a > *(double *)b)
439 return -1;
440 if(*(double *)a < *(double *)b)
441 return 1;
442 return 0;
443 }
444
solve_sub_problem(double A_i,int yi,double C_yi,int active_i,double * alpha_new)445 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
446 {
447 int r;
448 double *D;
449
450 clone(D, B, active_i);
451 if(yi < active_i)
452 D[yi] += A_i*C_yi;
453 qsort(D, active_i, sizeof(double), compare_double);
454
455 double beta = D[0] - A_i*C_yi;
456 for(r=1;r<active_i && beta<r*D[r];r++)
457 beta += D[r];
458
459 beta /= r;
460 for(r=0;r<active_i;r++)
461 {
462 if(r == yi)
463 alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
464 else
465 alpha_new[r] = min((double)0, (beta - B[r])/A_i);
466 }
467 delete[] D;
468 }
469
be_shrunk(int i,int m,int yi,double alpha_i,double minG)470 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
471 {
472 double bound = 0;
473 if(m == yi)
474 bound = C[GETI(i)];
475 if(alpha_i == bound && G[m] < minG)
476 return true;
477 return false;
478 }
479
Solve(double * w)480 void Solver_MCSVM_CS::Solve(double *w)
481 {
482 int i, m, s;
483 int iter = 0;
484 double *alpha = new double[l*nr_class];
485 double *alpha_new = new double[nr_class];
486 int *index = new int[l];
487 double *QD = new double[l];
488 int *d_ind = new int[nr_class];
489 double *d_val = new double[nr_class];
490 int *alpha_index = new int[nr_class*l];
491 int *y_index = new int[l];
492 int active_size = l;
493 int *active_size_i = new int[l];
494 double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
495 bool start_from_all = true;
496 // initial
497 for(i=0;i<l*nr_class;i++)
498 alpha[i] = 0;
499 for(i=0;i<w_size*nr_class;i++)
500 w[i] = 0;
501 for(i=0;i<l;i++)
502 {
503 for(m=0;m<nr_class;m++)
504 alpha_index[i*nr_class+m] = m;
505 feature_node *xi = prob->x[i];
506 QD[i] = 0;
507 while(xi->index != -1)
508 {
509 QD[i] += (xi->value)*(xi->value);
510 xi++;
511 }
512 active_size_i[i] = nr_class;
513 y_index[i] = prob->y[i];
514 index[i] = i;
515 }
516
517 while(iter < max_iter)
518 {
519 double stopping = -INF;
520 for(i=0;i<active_size;i++)
521 {
522 int j = i+rand()%(active_size-i);
523 swap(index[i], index[j]);
524 }
525 for(s=0;s<active_size;s++)
526 {
527 i = index[s];
528 double Ai = QD[i];
529 double *alpha_i = &alpha[i*nr_class];
530 int *alpha_index_i = &alpha_index[i*nr_class];
531
532 if(Ai > 0)
533 {
534 for(m=0;m<active_size_i[i];m++)
535 G[m] = 1;
536 if(y_index[i] < active_size_i[i])
537 G[y_index[i]] = 0;
538
539 feature_node *xi = prob->x[i];
540 while(xi->index!= -1)
541 {
542 double *w_i = &w[(xi->index-1)*nr_class];
543 for(m=0;m<active_size_i[i];m++)
544 G[m] += w_i[alpha_index_i[m]]*(xi->value);
545 xi++;
546 }
547
548 double minG = INF;
549 double maxG = -INF;
550 for(m=0;m<active_size_i[i];m++)
551 {
552 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
553 minG = G[m];
554 if(G[m] > maxG)
555 maxG = G[m];
556 }
557 if(y_index[i] < active_size_i[i])
558 if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
559 minG = G[y_index[i]];
560
561 for(m=0;m<active_size_i[i];m++)
562 {
563 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
564 {
565 active_size_i[i]--;
566 while(active_size_i[i]>m)
567 {
568 if(!be_shrunk(i, active_size_i[i], y_index[i],
569 alpha_i[alpha_index_i[active_size_i[i]]], minG))
570 {
571 swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
572 swap(G[m], G[active_size_i[i]]);
573 if(y_index[i] == active_size_i[i])
574 y_index[i] = m;
575 else if(y_index[i] == m)
576 y_index[i] = active_size_i[i];
577 break;
578 }
579 active_size_i[i]--;
580 }
581 }
582 }
583
584 if(active_size_i[i] <= 1)
585 {
586 active_size--;
587 swap(index[s], index[active_size]);
588 s--;
589 continue;
590 }
591
592 if(maxG-minG <= 1e-12)
593 continue;
594 else
595 stopping = max(maxG - minG, stopping);
596
597 for(m=0;m<active_size_i[i];m++)
598 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
599
600 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
601 int nz_d = 0;
602 for(m=0;m<active_size_i[i];m++)
603 {
604 double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
605 alpha_i[alpha_index_i[m]] = alpha_new[m];
606 if(fabs(d) >= 1e-12)
607 {
608 d_ind[nz_d] = alpha_index_i[m];
609 d_val[nz_d] = d;
610 nz_d++;
611 }
612 }
613
614 xi = prob->x[i];
615 while(xi->index != -1)
616 {
617 double *w_i = &w[(xi->index-1)*nr_class];
618 for(m=0;m<nz_d;m++)
619 w_i[d_ind[m]] += d_val[m]*xi->value;
620 xi++;
621 }
622 }
623 }
624
625 iter++;
626 if(iter % 10 == 0)
627 {
628 info(".");
629 }
630
631 if(stopping < eps_shrink)
632 {
633 if(stopping < eps && start_from_all == true)
634 break;
635 else
636 {
637 active_size = l;
638 for(i=0;i<l;i++)
639 active_size_i[i] = nr_class;
640 info("*");
641 eps_shrink = max(eps_shrink/2, eps);
642 start_from_all = true;
643 }
644 }
645 else
646 start_from_all = false;
647 }
648
649 info("\noptimization finished, #iter = %d\n",iter);
650 if (iter >= max_iter)
651 info("\nWARNING: reaching max number of iterations\n");
652
653 // calculate objective value
654 double v = 0;
655 int nSV = 0;
656 for(i=0;i<w_size*nr_class;i++)
657 v += w[i]*w[i];
658 v = 0.5*v;
659 for(i=0;i<l*nr_class;i++)
660 {
661 v += alpha[i];
662 if(fabs(alpha[i]) > 0)
663 nSV++;
664 }
665 for(i=0;i<l;i++)
666 v -= alpha[i*nr_class+prob->y[i]];
667 info("Objective value = %lf\n",v);
668 info("nSV = %d\n",nSV);
669
670 delete [] alpha;
671 delete [] alpha_new;
672 delete [] index;
673 delete [] QD;
674 delete [] d_ind;
675 delete [] d_val;
676 delete [] alpha_index;
677 delete [] y_index;
678 delete [] active_size_i;
679 }
680
681 // A coordinate descent algorithm for
682 // L1-loss and L2-loss SVM dual problems
683 //
684 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
685 // s.t. 0 <= alpha_i <= upper_bound_i,
686 //
687 // where Qij = yi yj xi^T xj and
688 // D is a diagonal matrix
689 //
690 // In L1-SVM case:
691 // upper_bound_i = Cp if y_i = 1
692 // upper_bound_i = Cn if y_i = -1
693 // D_ii = 0
694 // In L2-SVM case:
695 // upper_bound_i = INF
696 // D_ii = 1/(2*Cp) if y_i = 1
697 // D_ii = 1/(2*Cn) if y_i = -1
698 //
699 // Given:
700 // x, y, Cp, Cn
701 // eps is the stopping tolerance
702 //
703 // solution will be put in w
704 //
705 // See Algorithm 3 of Hsieh et al., ICML 2008
706
707 #undef GETI
708 #define GETI(i) (y[i]+1)
709 // To support weights for instances, use GETI(i) (i)
710
solve_l2r_l1l2_svc(const problem * prob,double * w,double eps,double Cp,double Cn,int solver_type)711 static void solve_l2r_l1l2_svc(
712 const problem *prob, double *w, double eps,
713 double Cp, double Cn, int solver_type)
714 {
715 int l = prob->l;
716 int w_size = prob->n;
717 int i, s, iter = 0;
718 double C, d, G;
719 double *QD = new double[l];
720 int max_iter = 1000;
721 int *index = new int[l];
722 double *alpha = new double[l];
723 schar *y = new schar[l];
724 int active_size = l;
725
726 // PG: projected gradient, for shrinking and stopping
727 double PG;
728 double PGmax_old = INF;
729 double PGmin_old = -INF;
730 double PGmax_new, PGmin_new;
731
732 // default solver_type: L2R_L2LOSS_SVC_DUAL
733 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
734 double upper_bound[3] = {INF, 0, INF};
735 if(solver_type == L2R_L1LOSS_SVC_DUAL)
736 {
737 diag[0] = 0;
738 diag[2] = 0;
739 upper_bound[0] = Cn;
740 upper_bound[2] = Cp;
741 }
742
743 for(i=0; i<w_size; i++)
744 w[i] = 0;
745 for(i=0; i<l; i++)
746 {
747 alpha[i] = 0;
748 if(prob->y[i] > 0)
749 {
750 y[i] = +1;
751 }
752 else
753 {
754 y[i] = -1;
755 }
756 QD[i] = diag[GETI(i)];
757
758 feature_node *xi = prob->x[i];
759 while (xi->index != -1)
760 {
761 QD[i] += (xi->value)*(xi->value);
762 xi++;
763 }
764 index[i] = i;
765 }
766
767 while (iter < max_iter)
768 {
769 PGmax_new = -INF;
770 PGmin_new = INF;
771
772 for (i=0; i<active_size; i++)
773 {
774 int j = i+rand()%(active_size-i);
775 swap(index[i], index[j]);
776 }
777
778 for (s=0; s<active_size; s++)
779 {
780 i = index[s];
781 G = 0;
782 schar yi = y[i];
783
784 feature_node *xi = prob->x[i];
785 while(xi->index!= -1)
786 {
787 G += w[xi->index-1]*(xi->value);
788 xi++;
789 }
790 G = G*yi-1;
791
792 C = upper_bound[GETI(i)];
793 G += alpha[i]*diag[GETI(i)];
794
795 PG = 0;
796 if (alpha[i] == 0)
797 {
798 if (G > PGmax_old)
799 {
800 active_size--;
801 swap(index[s], index[active_size]);
802 s--;
803 continue;
804 }
805 else if (G < 0)
806 PG = G;
807 }
808 else if (alpha[i] == C)
809 {
810 if (G < PGmin_old)
811 {
812 active_size--;
813 swap(index[s], index[active_size]);
814 s--;
815 continue;
816 }
817 else if (G > 0)
818 PG = G;
819 }
820 else
821 PG = G;
822
823 PGmax_new = max(PGmax_new, PG);
824 PGmin_new = min(PGmin_new, PG);
825
826 if(fabs(PG) > 1.0e-12)
827 {
828 double alpha_old = alpha[i];
829 alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
830 d = (alpha[i] - alpha_old)*yi;
831 xi = prob->x[i];
832 while (xi->index != -1)
833 {
834 w[xi->index-1] += d*xi->value;
835 xi++;
836 }
837 }
838 }
839
840 iter++;
841 if(iter % 10 == 0)
842 info(".");
843
844 if(PGmax_new - PGmin_new <= eps)
845 {
846 if(active_size == l)
847 break;
848 else
849 {
850 active_size = l;
851 info("*");
852 PGmax_old = INF;
853 PGmin_old = -INF;
854 continue;
855 }
856 }
857 PGmax_old = PGmax_new;
858 PGmin_old = PGmin_new;
859 if (PGmax_old <= 0)
860 PGmax_old = INF;
861 if (PGmin_old >= 0)
862 PGmin_old = -INF;
863 }
864
865 info("\noptimization finished, #iter = %d\n",iter);
866 if (iter >= max_iter)
867 info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
868
869 // calculate objective value
870
871 double v = 0;
872 int nSV = 0;
873 for(i=0; i<w_size; i++)
874 v += w[i]*w[i];
875 for(i=0; i<l; i++)
876 {
877 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
878 if(alpha[i] > 0)
879 ++nSV;
880 }
881 info("Objective value = %lf\n",v/2);
882 info("nSV = %d\n",nSV);
883
884 delete [] QD;
885 delete [] alpha;
886 delete [] y;
887 delete [] index;
888 }
889
890 // A coordinate descent algorithm for
891 // the dual of L2-regularized logistic regression problems
892 //
893 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) ,
894 // s.t. 0 <= alpha_i <= upper_bound_i,
895 //
896 // where Qij = yi yj xi^T xj and
897 // upper_bound_i = Cp if y_i = 1
898 // upper_bound_i = Cn if y_i = -1
899 //
900 // Given:
901 // x, y, Cp, Cn
902 // eps is the stopping tolerance
903 //
904 // solution will be put in w
905 //
906 // See Algorithm 5 of Yu et al., MLJ 2010
907
908 #undef GETI
909 #define GETI(i) (y[i]+1)
910 // To support weights for instances, use GETI(i) (i)
911
solve_l2r_lr_dual(const problem * prob,double * w,double eps,double Cp,double Cn)912 void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
913 {
914 int l = prob->l;
915 int w_size = prob->n;
916 int i, s, iter = 0;
917 double *xTx = new double[l];
918 int max_iter = 1000;
919 int *index = new int[l];
920 double *alpha = new double[2*l]; // store alpha and C - alpha
921 schar *y = new schar[l];
922 int max_inner_iter = 100; // for inner Newton
923 double innereps = 1e-2;
924 double innereps_min = min(1e-8, eps);
925 double upper_bound[3] = {Cn, 0, Cp};
926
927 for(i=0; i<w_size; i++)
928 w[i] = 0;
929 for(i=0; i<l; i++)
930 {
931 if(prob->y[i] > 0)
932 {
933 y[i] = +1;
934 }
935 else
936 {
937 y[i] = -1;
938 }
939 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
940 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
941
942 xTx[i] = 0;
943 feature_node *xi = prob->x[i];
944 while (xi->index != -1)
945 {
946 xTx[i] += (xi->value)*(xi->value);
947 w[xi->index-1] += y[i]*alpha[2*i]*xi->value;
948 xi++;
949 }
950 index[i] = i;
951 }
952
953 while (iter < max_iter)
954 {
955 for (i=0; i<l; i++)
956 {
957 int j = i+rand()%(l-i);
958 swap(index[i], index[j]);
959 }
960 int newton_iter = 0;
961 double Gmax = 0;
962 for (s=0; s<l; s++)
963 {
964 i = index[s];
965 schar yi = y[i];
966 double C = upper_bound[GETI(i)];
967 double ywTx = 0, xisq = xTx[i];
968 feature_node *xi = prob->x[i];
969 while (xi->index != -1)
970 {
971 ywTx += w[xi->index-1]*xi->value;
972 xi++;
973 }
974 ywTx *= y[i];
975 double a = xisq, b = ywTx;
976
977 // Decide to minimize g_1(z) or g_2(z)
978 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
979 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
980 {
981 ind1 = 2*i+1;
982 ind2 = 2*i;
983 sign = -1;
984 }
985
986 // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
987 double alpha_old = alpha[ind1];
988 double z = alpha_old;
989 if(C - z < 0.5 * C)
990 z = 0.1*z;
991 double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
992 Gmax = max(Gmax, fabs(gp));
993
994 // Newton method on the sub-problem
995 const double eta = 0.1; // xi in the paper
996 int inner_iter = 0;
997 while (inner_iter <= max_inner_iter)
998 {
999 if(fabs(gp) < innereps)
1000 break;
1001 double gpp = a + C/(C-z)/z;
1002 double tmpz = z - gp/gpp;
1003 if(tmpz <= 0)
1004 z *= eta;
1005 else // tmpz in (0, C)
1006 z = tmpz;
1007 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1008 newton_iter++;
1009 inner_iter++;
1010 }
1011
1012 if(inner_iter > 0) // update w
1013 {
1014 alpha[ind1] = z;
1015 alpha[ind2] = C-z;
1016 xi = prob->x[i];
1017 while (xi->index != -1)
1018 {
1019 w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
1020 xi++;
1021 }
1022 }
1023 }
1024
1025 iter++;
1026 if(iter % 10 == 0)
1027 info(".");
1028
1029 if(Gmax < eps)
1030 break;
1031
1032 if(newton_iter <= l/10)
1033 innereps = max(innereps_min, 0.1*innereps);
1034
1035 }
1036
1037 info("\noptimization finished, #iter = %d\n",iter);
1038 if (iter >= max_iter)
1039 info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1040
1041 // calculate objective value
1042
1043 double v = 0;
1044 for(i=0; i<w_size; i++)
1045 v += w[i] * w[i];
1046 v *= 0.5;
1047 for(i=0; i<l; i++)
1048 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1049 - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1050 info("Objective value = %lf\n", v);
1051
1052 delete [] xTx;
1053 delete [] alpha;
1054 delete [] y;
1055 delete [] index;
1056 }
1057
1058 // A coordinate descent algorithm for
1059 // L1-regularized L2-loss support vector classification
1060 //
1061 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1062 //
1063 // Given:
1064 // x, y, Cp, Cn
1065 // eps is the stopping tolerance
1066 //
1067 // solution will be put in w
1068 //
1069 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1070
1071 #undef GETI
1072 #define GETI(i) (y[i]+1)
1073 // To support weights for instances, use GETI(i) (i)
1074
solve_l1r_l2_svc(problem * prob_col,double * w,double eps,double Cp,double Cn)1075 static void solve_l1r_l2_svc(
1076 problem *prob_col, double *w, double eps,
1077 double Cp, double Cn)
1078 {
1079 int l = prob_col->l;
1080 int w_size = prob_col->n;
1081 int j, s, iter = 0;
1082 int max_iter = 1000;
1083 int active_size = w_size;
1084 int max_num_linesearch = 20;
1085
1086 double sigma = 0.01;
1087 double d, G_loss, G, H;
1088 double Gmax_old = INF;
1089 double Gmax_new, Gnorm1_new;
1090 double Gnorm1_init;
1091 double d_old, d_diff;
1092 double loss_old, loss_new;
1093 double appxcond, cond;
1094
1095 int *index = new int[w_size];
1096 schar *y = new schar[l];
1097 double *b = new double[l]; // b = 1-ywTx
1098 double *xj_sq = new double[w_size];
1099 feature_node *x;
1100
1101 double C[3] = {Cn,0,Cp};
1102
1103 for(j=0; j<l; j++)
1104 {
1105 b[j] = 1;
1106 if(prob_col->y[j] > 0)
1107 y[j] = 1;
1108 else
1109 y[j] = -1;
1110 }
1111 for(j=0; j<w_size; j++)
1112 {
1113 w[j] = 0;
1114 index[j] = j;
1115 xj_sq[j] = 0;
1116 x = prob_col->x[j];
1117 while(x->index != -1)
1118 {
1119 int ind = x->index-1;
1120 double val = x->value;
1121 x->value *= y[ind]; // x->value stores yi*xij
1122 xj_sq[j] += C[GETI(ind)]*val*val;
1123 x++;
1124 }
1125 }
1126
1127 while(iter < max_iter)
1128 {
1129 Gmax_new = 0;
1130 Gnorm1_new = 0;
1131
1132 for(j=0; j<active_size; j++)
1133 {
1134 int i = j+rand()%(active_size-j);
1135 swap(index[i], index[j]);
1136 }
1137
1138 for(s=0; s<active_size; s++)
1139 {
1140 j = index[s];
1141 G_loss = 0;
1142 H = 0;
1143
1144 x = prob_col->x[j];
1145 while(x->index != -1)
1146 {
1147 int ind = x->index-1;
1148 if(b[ind] > 0)
1149 {
1150 double val = x->value;
1151 double tmp = C[GETI(ind)]*val;
1152 G_loss -= tmp*b[ind];
1153 H += tmp*val;
1154 }
1155 x++;
1156 }
1157 G_loss *= 2;
1158
1159 G = G_loss;
1160 H *= 2;
1161 H = max(H, 1e-12);
1162
1163 double Gp = G+1;
1164 double Gn = G-1;
1165 double violation = 0;
1166 if(w[j] == 0)
1167 {
1168 if(Gp < 0)
1169 violation = -Gp;
1170 else if(Gn > 0)
1171 violation = Gn;
1172 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1173 {
1174 active_size--;
1175 swap(index[s], index[active_size]);
1176 s--;
1177 continue;
1178 }
1179 }
1180 else if(w[j] > 0)
1181 violation = fabs(Gp);
1182 else
1183 violation = fabs(Gn);
1184
1185 Gmax_new = max(Gmax_new, violation);
1186 Gnorm1_new += violation;
1187
1188 // obtain Newton direction d
1189 if(Gp <= H*w[j])
1190 d = -Gp/H;
1191 else if(Gn >= H*w[j])
1192 d = -Gn/H;
1193 else
1194 d = -w[j];
1195
1196 if(fabs(d) < 1.0e-12)
1197 continue;
1198
1199 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1200 d_old = 0;
1201 int num_linesearch;
1202 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1203 {
1204 d_diff = d_old - d;
1205 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1206
1207 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1208 if(appxcond <= 0)
1209 {
1210 x = prob_col->x[j];
1211 while(x->index != -1)
1212 {
1213 b[x->index-1] += d_diff*x->value;
1214 x++;
1215 }
1216 break;
1217 }
1218
1219 if(num_linesearch == 0)
1220 {
1221 loss_old = 0;
1222 loss_new = 0;
1223 x = prob_col->x[j];
1224 while(x->index != -1)
1225 {
1226 int ind = x->index-1;
1227 if(b[ind] > 0)
1228 loss_old += C[GETI(ind)]*b[ind]*b[ind];
1229 double b_new = b[ind] + d_diff*x->value;
1230 b[ind] = b_new;
1231 if(b_new > 0)
1232 loss_new += C[GETI(ind)]*b_new*b_new;
1233 x++;
1234 }
1235 }
1236 else
1237 {
1238 loss_new = 0;
1239 x = prob_col->x[j];
1240 while(x->index != -1)
1241 {
1242 int ind = x->index-1;
1243 double b_new = b[ind] + d_diff*x->value;
1244 b[ind] = b_new;
1245 if(b_new > 0)
1246 loss_new += C[GETI(ind)]*b_new*b_new;
1247 x++;
1248 }
1249 }
1250
1251 cond = cond + loss_new - loss_old;
1252 if(cond <= 0)
1253 break;
1254 else
1255 {
1256 d_old = d;
1257 d *= 0.5;
1258 delta *= 0.5;
1259 }
1260 }
1261
1262 w[j] += d;
1263
1264 // recompute b[] if line search takes too many steps
1265 if(num_linesearch >= max_num_linesearch)
1266 {
1267 info("#");
1268 for(int i=0; i<l; i++)
1269 b[i] = 1;
1270
1271 for(int i=0; i<w_size; i++)
1272 {
1273 if(w[i]==0) continue;
1274 x = prob_col->x[i];
1275 while(x->index != -1)
1276 {
1277 b[x->index-1] -= w[i]*x->value;
1278 x++;
1279 }
1280 }
1281 }
1282 }
1283
1284 if(iter == 0)
1285 Gnorm1_init = Gnorm1_new;
1286 iter++;
1287 if(iter % 10 == 0)
1288 info(".");
1289
1290 if(Gnorm1_new <= eps*Gnorm1_init)
1291 {
1292 if(active_size == w_size)
1293 break;
1294 else
1295 {
1296 active_size = w_size;
1297 info("*");
1298 Gmax_old = INF;
1299 continue;
1300 }
1301 }
1302
1303 Gmax_old = Gmax_new;
1304 }
1305
1306 info("\noptimization finished, #iter = %d\n", iter);
1307 if(iter >= max_iter)
1308 info("\nWARNING: reaching max number of iterations\n");
1309
1310 // calculate objective value
1311
1312 double v = 0;
1313 int nnz = 0;
1314 for(j=0; j<w_size; j++)
1315 {
1316 x = prob_col->x[j];
1317 while(x->index != -1)
1318 {
1319 x->value *= prob_col->y[x->index-1]; // restore x->value
1320 x++;
1321 }
1322 if(w[j] != 0)
1323 {
1324 v += fabs(w[j]);
1325 nnz++;
1326 }
1327 }
1328 for(j=0; j<l; j++)
1329 if(b[j] > 0)
1330 v += C[GETI(j)]*b[j]*b[j];
1331
1332 info("Objective value = %lf\n", v);
1333 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1334
1335 delete [] index;
1336 delete [] y;
1337 delete [] b;
1338 delete [] xj_sq;
1339 }
1340
1341 // A coordinate descent algorithm for
1342 // L1-regularized logistic regression problems
1343 //
1344 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1345 //
1346 // Given:
1347 // x, y, Cp, Cn
1348 // eps is the stopping tolerance
1349 //
1350 // solution will be put in w
1351 //
1352 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1353
1354 #undef GETI
1355 #define GETI(i) (y[i]+1)
1356 // To support weights for instances, use GETI(i) (i)
1357
solve_l1r_lr(const problem * prob_col,double * w,double eps,double Cp,double Cn)1358 static void solve_l1r_lr(
1359 const problem *prob_col, double *w, double eps,
1360 double Cp, double Cn)
1361 {
1362 int l = prob_col->l;
1363 int w_size = prob_col->n;
1364 int j, s, newton_iter=0, iter=0;
1365 int max_newton_iter = 100;
1366 int max_iter = 1000;
1367 int max_num_linesearch = 20;
1368 int active_size;
1369 int QP_active_size;
1370
1371 double nu = 1e-12;
1372 double inner_eps = 1;
1373 double sigma = 0.01;
1374 double w_norm=0, w_norm_new;
1375 double z, G, H;
1376 double Gnorm1_init;
1377 double Gmax_old = INF;
1378 double Gmax_new, Gnorm1_new;
1379 double QP_Gmax_old = INF;
1380 double QP_Gmax_new, QP_Gnorm1_new;
1381 double delta, negsum_xTd, cond;
1382
1383 int *index = new int[w_size];
1384 schar *y = new schar[l];
1385 double *Hdiag = new double[w_size];
1386 double *Grad = new double[w_size];
1387 double *wpd = new double[w_size];
1388 double *xjneg_sum = new double[w_size];
1389 double *xTd = new double[l];
1390 double *exp_wTx = new double[l];
1391 double *exp_wTx_new = new double[l];
1392 double *tau = new double[l];
1393 double *D = new double[l];
1394 feature_node *x;
1395
1396 double C[3] = {Cn,0,Cp};
1397
1398 for(j=0; j<l; j++)
1399 {
1400 if(prob_col->y[j] > 0)
1401 y[j] = 1;
1402 else
1403 y[j] = -1;
1404
1405 // assume initial w is 0
1406 exp_wTx[j] = 1;
1407 tau[j] = C[GETI(j)]*0.5;
1408 D[j] = C[GETI(j)]*0.25;
1409 }
1410 for(j=0; j<w_size; j++)
1411 {
1412 w[j] = 0;
1413 wpd[j] = w[j];
1414 index[j] = j;
1415 xjneg_sum[j] = 0;
1416 x = prob_col->x[j];
1417 while(x->index != -1)
1418 {
1419 int ind = x->index-1;
1420 if(y[ind] == -1)
1421 xjneg_sum[j] += C[GETI(ind)]*x->value;
1422 x++;
1423 }
1424 }
1425
1426 while(newton_iter < max_newton_iter)
1427 {
1428 Gmax_new = 0;
1429 Gnorm1_new = 0;
1430 active_size = w_size;
1431
1432 for(s=0; s<active_size; s++)
1433 {
1434 j = index[s];
1435 Hdiag[j] = nu;
1436 Grad[j] = 0;
1437
1438 double tmp = 0;
1439 x = prob_col->x[j];
1440 while(x->index != -1)
1441 {
1442 int ind = x->index-1;
1443 Hdiag[j] += x->value*x->value*D[ind];
1444 tmp += x->value*tau[ind];
1445 x++;
1446 }
1447 Grad[j] = -tmp + xjneg_sum[j];
1448
1449 double Gp = Grad[j]+1;
1450 double Gn = Grad[j]-1;
1451 double violation = 0;
1452 if(w[j] == 0)
1453 {
1454 if(Gp < 0)
1455 violation = -Gp;
1456 else if(Gn > 0)
1457 violation = Gn;
1458 //outer-level shrinking
1459 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1460 {
1461 active_size--;
1462 swap(index[s], index[active_size]);
1463 s--;
1464 continue;
1465 }
1466 }
1467 else if(w[j] > 0)
1468 violation = fabs(Gp);
1469 else
1470 violation = fabs(Gn);
1471
1472 Gmax_new = max(Gmax_new, violation);
1473 Gnorm1_new += violation;
1474 }
1475
1476 if(newton_iter == 0)
1477 Gnorm1_init = Gnorm1_new;
1478
1479 if(Gnorm1_new <= eps*Gnorm1_init)
1480 break;
1481
1482 iter = 0;
1483 QP_Gmax_old = INF;
1484 QP_active_size = active_size;
1485
1486 for(int i=0; i<l; i++)
1487 xTd[i] = 0;
1488
1489 // optimize QP over wpd
1490 while(iter < max_iter)
1491 {
1492 QP_Gmax_new = 0;
1493 QP_Gnorm1_new = 0;
1494
1495 for(j=0; j<QP_active_size; j++)
1496 {
1497 int i = j+rand()%(QP_active_size-j);
1498 swap(index[i], index[j]);
1499 }
1500
1501 for(s=0; s<QP_active_size; s++)
1502 {
1503 j = index[s];
1504 H = Hdiag[j];
1505
1506 x = prob_col->x[j];
1507 G = Grad[j] + (wpd[j]-w[j])*nu;
1508 while(x->index != -1)
1509 {
1510 int ind = x->index-1;
1511 G += x->value*D[ind]*xTd[ind];
1512 x++;
1513 }
1514
1515 double Gp = G+1;
1516 double Gn = G-1;
1517 double violation = 0;
1518 if(wpd[j] == 0)
1519 {
1520 if(Gp < 0)
1521 violation = -Gp;
1522 else if(Gn > 0)
1523 violation = Gn;
1524 //inner-level shrinking
1525 else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1526 {
1527 QP_active_size--;
1528 swap(index[s], index[QP_active_size]);
1529 s--;
1530 continue;
1531 }
1532 }
1533 else if(wpd[j] > 0)
1534 violation = fabs(Gp);
1535 else
1536 violation = fabs(Gn);
1537
1538 QP_Gmax_new = max(QP_Gmax_new, violation);
1539 QP_Gnorm1_new += violation;
1540
1541 // obtain solution of one-variable problem
1542 if(Gp <= H*wpd[j])
1543 z = -Gp/H;
1544 else if(Gn >= H*wpd[j])
1545 z = -Gn/H;
1546 else
1547 z = -wpd[j];
1548
1549 if(fabs(z) < 1.0e-12)
1550 continue;
1551 z = min(max(z,-10.0),10.0);
1552
1553 wpd[j] += z;
1554
1555 x = prob_col->x[j];
1556 while(x->index != -1)
1557 {
1558 int ind = x->index-1;
1559 xTd[ind] += x->value*z;
1560 x++;
1561 }
1562 }
1563
1564 iter++;
1565
1566 if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1567 {
1568 //inner stopping
1569 if(QP_active_size == active_size)
1570 break;
1571 //active set reactivation
1572 else
1573 {
1574 QP_active_size = active_size;
1575 QP_Gmax_old = INF;
1576 continue;
1577 }
1578 }
1579
1580 QP_Gmax_old = QP_Gmax_new;
1581 }
1582
1583 if(iter >= max_iter)
1584 info("WARNING: reaching max number of inner iterations\n");
1585
1586 delta = 0;
1587 w_norm_new = 0;
1588 for(j=0; j<w_size; j++)
1589 {
1590 delta += Grad[j]*(wpd[j]-w[j]);
1591 if(wpd[j] != 0)
1592 w_norm_new += fabs(wpd[j]);
1593 }
1594 delta += (w_norm_new-w_norm);
1595
1596 negsum_xTd = 0;
1597 for(int i=0; i<l; i++)
1598 if(y[i] == -1)
1599 negsum_xTd += C[GETI(i)]*xTd[i];
1600
1601 int num_linesearch;
1602 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1603 {
1604 cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1605
1606 for(int i=0; i<l; i++)
1607 {
1608 double exp_xTd = exp(xTd[i]);
1609 exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1610 cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1611 }
1612
1613 if(cond <= 0)
1614 {
1615 w_norm = w_norm_new;
1616 for(j=0; j<w_size; j++)
1617 w[j] = wpd[j];
1618 for(int i=0; i<l; i++)
1619 {
1620 exp_wTx[i] = exp_wTx_new[i];
1621 double tau_tmp = 1/(1+exp_wTx[i]);
1622 tau[i] = C[GETI(i)]*tau_tmp;
1623 D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1624 }
1625 break;
1626 }
1627 else
1628 {
1629 w_norm_new = 0;
1630 for(j=0; j<w_size; j++)
1631 {
1632 wpd[j] = (w[j]+wpd[j])*0.5;
1633 if(wpd[j] != 0)
1634 w_norm_new += fabs(wpd[j]);
1635 }
1636 delta *= 0.5;
1637 negsum_xTd *= 0.5;
1638 for(int i=0; i<l; i++)
1639 xTd[i] *= 0.5;
1640 }
1641 }
1642
1643 // Recompute some info due to too many line search steps
1644 if(num_linesearch >= max_num_linesearch)
1645 {
1646 for(int i=0; i<l; i++)
1647 exp_wTx[i] = 0;
1648
1649 for(int i=0; i<w_size; i++)
1650 {
1651 if(w[i]==0) continue;
1652 x = prob_col->x[i];
1653 while(x->index != -1)
1654 {
1655 exp_wTx[x->index-1] += w[i]*x->value;
1656 x++;
1657 }
1658 }
1659
1660 for(int i=0; i<l; i++)
1661 exp_wTx[i] = exp(exp_wTx[i]);
1662 }
1663
1664 if(iter == 1)
1665 inner_eps *= 0.25;
1666
1667 newton_iter++;
1668 Gmax_old = Gmax_new;
1669
1670 info("iter %3d #CD cycles %d\n", newton_iter, iter);
1671 }
1672
1673 info("=========================\n");
1674 info("optimization finished, #iter = %d\n", newton_iter);
1675 if(newton_iter >= max_newton_iter)
1676 info("WARNING: reaching max number of iterations\n");
1677
1678 // calculate objective value
1679
1680 double v = 0;
1681 int nnz = 0;
1682 for(j=0; j<w_size; j++)
1683 if(w[j] != 0)
1684 {
1685 v += fabs(w[j]);
1686 nnz++;
1687 }
1688 for(j=0; j<l; j++)
1689 if(y[j] == 1)
1690 v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1691 else
1692 v += C[GETI(j)]*log(1+exp_wTx[j]);
1693
1694 info("Objective value = %lf\n", v);
1695 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1696
1697 delete [] index;
1698 delete [] y;
1699 delete [] Hdiag;
1700 delete [] Grad;
1701 delete [] wpd;
1702 delete [] xjneg_sum;
1703 delete [] xTd;
1704 delete [] exp_wTx;
1705 delete [] exp_wTx_new;
1706 delete [] tau;
1707 delete [] D;
1708 }
1709
1710 // transpose matrix X from row format to column format
transpose(const problem * prob,feature_node ** x_space_ret,problem * prob_col)1711 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
1712 {
1713 int i;
1714 int l = prob->l;
1715 int n = prob->n;
1716 int nnz = 0;
1717 int *col_ptr = new int[n+1];
1718 feature_node *x_space;
1719 prob_col->l = l;
1720 prob_col->n = n;
1721 prob_col->y = new int[l];
1722 prob_col->x = new feature_node*[n];
1723
1724 for(i=0; i<l; i++)
1725 prob_col->y[i] = prob->y[i];
1726
1727 for(i=0; i<n+1; i++)
1728 col_ptr[i] = 0;
1729 for(i=0; i<l; i++)
1730 {
1731 feature_node *x = prob->x[i];
1732 while(x->index != -1)
1733 {
1734 nnz++;
1735 col_ptr[x->index]++;
1736 x++;
1737 }
1738 }
1739 for(i=1; i<n+1; i++)
1740 col_ptr[i] += col_ptr[i-1] + 1;
1741
1742 x_space = new feature_node[nnz+n];
1743 for(i=0; i<n; i++)
1744 prob_col->x[i] = &x_space[col_ptr[i]];
1745
1746 for(i=0; i<l; i++)
1747 {
1748 feature_node *x = prob->x[i];
1749 while(x->index != -1)
1750 {
1751 int ind = x->index-1;
1752 x_space[col_ptr[ind]].index = i+1; // starts from 1
1753 x_space[col_ptr[ind]].value = x->value;
1754 col_ptr[ind]++;
1755 x++;
1756 }
1757 }
1758 for(i=0; i<n; i++)
1759 x_space[col_ptr[i]].index = -1;
1760
1761 *x_space_ret = x_space;
1762
1763 delete [] col_ptr;
1764 }
1765
1766 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
1767 // perm, length l, must be allocated before calling this subroutine
group_classes(const problem * prob,int * nr_class_ret,int ** label_ret,int ** start_ret,int ** count_ret,int * perm)1768 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
1769 {
1770 int l = prob->l;
1771 int max_nr_class = 16;
1772 int nr_class = 0;
1773 int *label = Malloc(int,max_nr_class);
1774 int *count = Malloc(int,max_nr_class);
1775 int *data_label = Malloc(int,l);
1776 int i;
1777
1778 for(i=0;i<l;i++)
1779 {
1780 int this_label = prob->y[i];
1781 int j;
1782 for(j=0;j<nr_class;j++)
1783 {
1784 if(this_label == label[j])
1785 {
1786 ++count[j];
1787 break;
1788 }
1789 }
1790 data_label[i] = j;
1791 if(j == nr_class)
1792 {
1793 if(nr_class == max_nr_class)
1794 {
1795 max_nr_class *= 2;
1796 label = (int *)realloc(label,max_nr_class*sizeof(int));
1797 count = (int *)realloc(count,max_nr_class*sizeof(int));
1798 }
1799 label[nr_class] = this_label;
1800 count[nr_class] = 1;
1801 ++nr_class;
1802 }
1803 }
1804
1805 int *start = Malloc(int,nr_class);
1806 start[0] = 0;
1807 for(i=1;i<nr_class;i++)
1808 start[i] = start[i-1]+count[i-1];
1809 for(i=0;i<l;i++)
1810 {
1811 perm[start[data_label[i]]] = i;
1812 ++start[data_label[i]];
1813 }
1814 start[0] = 0;
1815 for(i=1;i<nr_class;i++)
1816 start[i] = start[i-1]+count[i-1];
1817
1818 *nr_class_ret = nr_class;
1819 *label_ret = label;
1820 *start_ret = start;
1821 *count_ret = count;
1822 free(data_label);
1823 }
1824
train_one(const problem * prob,const parameter * param,double * w,double Cp,double Cn)1825 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
1826 {
1827 double eps=param->eps;
1828 int pos = 0;
1829 int neg = 0;
1830 for(int i=0;i<prob->l;i++)
1831 if(prob->y[i]==+1)
1832 pos++;
1833 neg = prob->l - pos;
1834
1835 function *fun_obj=NULL;
1836 switch(param->solver_type)
1837 {
1838 case L2R_LR:
1839 {
1840 fun_obj=new l2r_lr_fun(prob, Cp, Cn);
1841 TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1842 tron_obj.set_print_string(liblinear_print_string);
1843 tron_obj.tron(w);
1844 delete fun_obj;
1845 break;
1846 }
1847 case L2R_L2LOSS_SVC:
1848 {
1849 fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn);
1850 TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1851 tron_obj.set_print_string(liblinear_print_string);
1852 tron_obj.tron(w);
1853 delete fun_obj;
1854 break;
1855 }
1856 case L2R_L2LOSS_SVC_DUAL:
1857 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
1858 break;
1859 case L2R_L1LOSS_SVC_DUAL:
1860 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
1861 break;
1862 case L1R_L2LOSS_SVC:
1863 {
1864 problem prob_col;
1865 feature_node *x_space = NULL;
1866 transpose(prob, &x_space ,&prob_col);
1867 solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1868 delete [] prob_col.y;
1869 delete [] prob_col.x;
1870 delete [] x_space;
1871 break;
1872 }
1873 case L1R_LR:
1874 {
1875 problem prob_col;
1876 feature_node *x_space = NULL;
1877 transpose(prob, &x_space ,&prob_col);
1878 solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1879 delete [] prob_col.y;
1880 delete [] prob_col.x;
1881 delete [] x_space;
1882 break;
1883 }
1884 case L2R_LR_DUAL:
1885 solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
1886 break;
1887 default:
1888 fprintf(stderr, "Error: unknown solver_type\n");
1889 break;
1890 }
1891 }
1892
1893 //
1894 // Interface functions
1895 //
train(const problem * prob,const parameter * param)1896 model* train(const problem *prob, const parameter *param)
1897 {
1898 int i,j;
1899 int l = prob->l;
1900 int n = prob->n;
1901 int w_size = prob->n;
1902 model *model_ = Malloc(model,1);
1903
1904 if(prob->bias>=0)
1905 model_->nr_feature=n-1;
1906 else
1907 model_->nr_feature=n;
1908 model_->param = *param;
1909 model_->bias = prob->bias;
1910
1911 int nr_class;
1912 int *label = NULL;
1913 int *start = NULL;
1914 int *count = NULL;
1915 int *perm = Malloc(int,l);
1916
1917 // group training data of the same class
1918 group_classes(prob,&nr_class,&label,&start,&count,perm);
1919
1920 model_->nr_class=nr_class;
1921 model_->label = Malloc(int,nr_class);
1922 for(i=0;i<nr_class;i++)
1923 model_->label[i] = label[i];
1924
1925 // calculate weighted C
1926 double *weighted_C = Malloc(double, nr_class);
1927 for(i=0;i<nr_class;i++)
1928 weighted_C[i] = param->C;
1929 for(i=0;i<param->nr_weight;i++)
1930 {
1931 for(j=0;j<nr_class;j++)
1932 if(param->weight_label[i] == label[j])
1933 break;
1934 if(j == nr_class)
1935 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
1936 else
1937 weighted_C[j] *= param->weight[i];
1938 }
1939
1940 // constructing the subproblem
1941 feature_node **x = Malloc(feature_node *,l);
1942 for(i=0;i<l;i++)
1943 x[i] = prob->x[perm[i]];
1944
1945 int k;
1946 problem sub_prob;
1947 sub_prob.l = l;
1948 sub_prob.n = n;
1949 sub_prob.x = Malloc(feature_node *,sub_prob.l);
1950 sub_prob.y = Malloc(int,sub_prob.l);
1951
1952 for(k=0; k<sub_prob.l; k++)
1953 sub_prob.x[k] = x[k];
1954
1955 // multi-class svm by Crammer and Singer
1956 if(param->solver_type == MCSVM_CS)
1957 {
1958 model_->w=Malloc(double, n*nr_class);
1959 for(i=0;i<nr_class;i++)
1960 for(j=start[i];j<start[i]+count[i];j++)
1961 sub_prob.y[j] = i;
1962 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
1963 Solver.Solve(model_->w);
1964 }
1965 else
1966 {
1967 if(nr_class == 2)
1968 {
1969 model_->w=Malloc(double, w_size);
1970
1971 int e0 = start[0]+count[0];
1972 k=0;
1973 for(; k<e0; k++)
1974 sub_prob.y[k] = +1;
1975 for(; k<sub_prob.l; k++)
1976 sub_prob.y[k] = -1;
1977
1978 train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
1979 }
1980 else
1981 {
1982 model_->w=Malloc(double, w_size*nr_class);
1983 double *w=Malloc(double, w_size);
1984 for(i=0;i<nr_class;i++)
1985 {
1986 int si = start[i];
1987 int ei = si+count[i];
1988
1989 k=0;
1990 for(; k<si; k++)
1991 sub_prob.y[k] = -1;
1992 for(; k<ei; k++)
1993 sub_prob.y[k] = +1;
1994 for(; k<sub_prob.l; k++)
1995 sub_prob.y[k] = -1;
1996
1997 train_one(&sub_prob, param, w, weighted_C[i], param->C);
1998
1999 for(int j=0;j<w_size;j++)
2000 model_->w[j*nr_class+i] = w[j];
2001 }
2002 free(w);
2003 }
2004
2005 }
2006
2007 free(x);
2008 free(label);
2009 free(start);
2010 free(count);
2011 free(perm);
2012 free(sub_prob.x);
2013 free(sub_prob.y);
2014 free(weighted_C);
2015 return model_;
2016 }
2017
cross_validation(const problem * prob,const parameter * param,int nr_fold,int * target)2018 void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
2019 {
2020 int i;
2021 int *fold_start = Malloc(int,nr_fold+1);
2022 int l = prob->l;
2023 int *perm = Malloc(int,l);
2024
2025 for(i=0;i<l;i++) perm[i]=i;
2026 for(i=0;i<l;i++)
2027 {
2028 int j = i+rand()%(l-i);
2029 swap(perm[i],perm[j]);
2030 }
2031 for(i=0;i<=nr_fold;i++)
2032 fold_start[i]=i*l/nr_fold;
2033
2034 for(i=0;i<nr_fold;i++)
2035 {
2036 int begin = fold_start[i];
2037 int end = fold_start[i+1];
2038 int j,k;
2039 struct problem subprob;
2040
2041 subprob.bias = prob->bias;
2042 subprob.n = prob->n;
2043 subprob.l = l-(end-begin);
2044 subprob.x = Malloc(struct feature_node*,subprob.l);
2045 subprob.y = Malloc(int,subprob.l);
2046
2047 k=0;
2048 for(j=0;j<begin;j++)
2049 {
2050 subprob.x[k] = prob->x[perm[j]];
2051 subprob.y[k] = prob->y[perm[j]];
2052 ++k;
2053 }
2054 for(j=end;j<l;j++)
2055 {
2056 subprob.x[k] = prob->x[perm[j]];
2057 subprob.y[k] = prob->y[perm[j]];
2058 ++k;
2059 }
2060 struct model *submodel = train(&subprob,param);
2061 for(j=begin;j<end;j++)
2062 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2063 free_and_destroy_model(&submodel);
2064 free(subprob.x);
2065 free(subprob.y);
2066 }
2067 free(fold_start);
2068 free(perm);
2069 }
2070
predict_values(const struct model * model_,const struct feature_node * x,double * dec_values)2071 int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2072 {
2073 int idx;
2074 int n;
2075 if(model_->bias>=0)
2076 n=model_->nr_feature+1;
2077 else
2078 n=model_->nr_feature;
2079 double *w=model_->w;
2080 int nr_class=model_->nr_class;
2081 int i;
2082 int nr_w;
2083 if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2084 nr_w = 1;
2085 else
2086 nr_w = nr_class;
2087
2088 const feature_node *lx=x;
2089 for(i=0;i<nr_w;i++)
2090 dec_values[i] = 0;
2091 for(; (idx=lx->index)!=-1; lx++)
2092 {
2093 // the dimension of testing data may exceed that of training
2094 if(idx<=n)
2095 for(i=0;i<nr_w;i++)
2096 dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2097 }
2098
2099 if(nr_class==2)
2100 return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2101 else
2102 {
2103 int dec_max_idx = 0;
2104 for(i=1;i<nr_class;i++)
2105 {
2106 if(dec_values[i] > dec_values[dec_max_idx])
2107 dec_max_idx = i;
2108 }
2109 return model_->label[dec_max_idx];
2110 }
2111 }
2112
predict(const model * model_,const feature_node * x)2113 int predict(const model *model_, const feature_node *x)
2114 {
2115 double *dec_values = Malloc(double, model_->nr_class);
2116 int label=predict_values(model_, x, dec_values);
2117 free(dec_values);
2118 return label;
2119 }
2120
predict_probability(const struct model * model_,const struct feature_node * x,double * prob_estimates)2121 int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2122 {
2123 if(check_probability_model(model_))
2124 {
2125 int i;
2126 int nr_class=model_->nr_class;
2127 int nr_w;
2128 if(nr_class==2)
2129 nr_w = 1;
2130 else
2131 nr_w = nr_class;
2132
2133 int label=predict_values(model_, x, prob_estimates);
2134 for(i=0;i<nr_w;i++)
2135 prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2136
2137 if(nr_class==2) // for binary classification
2138 prob_estimates[1]=1.-prob_estimates[0];
2139 else
2140 {
2141 double sum=0;
2142 for(i=0; i<nr_class; i++)
2143 sum+=prob_estimates[i];
2144
2145 for(i=0; i<nr_class; i++)
2146 prob_estimates[i]=prob_estimates[i]/sum;
2147 }
2148
2149 return label;
2150 }
2151 else
2152 return 0;
2153 }
2154
2155 static const char *solver_type_table[]=
2156 {
2157 "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2158 "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL
2159 };
2160
save_model(const char * model_file_name,const struct model * model_)2161 int save_model(const char *model_file_name, const struct model *model_)
2162 {
2163 int i;
2164 int nr_feature=model_->nr_feature;
2165 int n;
2166 const parameter& param = model_->param;
2167
2168 if(model_->bias>=0)
2169 n=nr_feature+1;
2170 else
2171 n=nr_feature;
2172 int w_size = n;
2173 FILE *fp = fopen(model_file_name,"w");
2174 if(fp==NULL) return -1;
2175
2176 int nr_w;
2177 if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2178 nr_w=1;
2179 else
2180 nr_w=model_->nr_class;
2181
2182 fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2183 fprintf(fp, "nr_class %d\n", model_->nr_class);
2184 fprintf(fp, "label");
2185 for(i=0; i<model_->nr_class; i++)
2186 fprintf(fp, " %d", model_->label[i]);
2187 fprintf(fp, "\n");
2188
2189 fprintf(fp, "nr_feature %d\n", nr_feature);
2190
2191 fprintf(fp, "bias %.16g\n", model_->bias);
2192
2193 fprintf(fp, "w\n");
2194 for(i=0; i<w_size; i++)
2195 {
2196 int j;
2197 for(j=0; j<nr_w; j++)
2198 fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2199 fprintf(fp, "\n");
2200 }
2201
2202 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2203 else return 0;
2204 }
2205
load_model(const char * model_file_name)2206 struct model *load_model(const char *model_file_name)
2207 {
2208 FILE *fp = fopen(model_file_name,"r");
2209 if(fp==NULL) return NULL;
2210
2211 int i;
2212 int nr_feature;
2213 int n;
2214 int nr_class;
2215 double bias;
2216 model *model_ = Malloc(model,1);
2217 parameter& param = model_->param;
2218
2219 model_->label = NULL;
2220
2221 char cmd[81];
2222 while(1)
2223 {
2224 fscanf(fp,"%80s",cmd);
2225 if(strcmp(cmd,"solver_type")==0)
2226 {
2227 fscanf(fp,"%80s",cmd);
2228 int i;
2229 for(i=0;solver_type_table[i];i++)
2230 {
2231 if(strcmp(solver_type_table[i],cmd)==0)
2232 {
2233 param.solver_type=i;
2234 break;
2235 }
2236 }
2237 if(solver_type_table[i] == NULL)
2238 {
2239 fprintf(stderr,"unknown solver type.\n");
2240 free(model_->label);
2241 free(model_);
2242 return NULL;
2243 }
2244 }
2245 else if(strcmp(cmd,"nr_class")==0)
2246 {
2247 fscanf(fp,"%d",&nr_class);
2248 model_->nr_class=nr_class;
2249 }
2250 else if(strcmp(cmd,"nr_feature")==0)
2251 {
2252 fscanf(fp,"%d",&nr_feature);
2253 model_->nr_feature=nr_feature;
2254 }
2255 else if(strcmp(cmd,"bias")==0)
2256 {
2257 fscanf(fp,"%lf",&bias);
2258 model_->bias=bias;
2259 }
2260 else if(strcmp(cmd,"w")==0)
2261 {
2262 break;
2263 }
2264 else if(strcmp(cmd,"label")==0)
2265 {
2266 int nr_class = model_->nr_class;
2267 model_->label = Malloc(int,nr_class);
2268 for(int i=0;i<nr_class;i++)
2269 fscanf(fp,"%d",&model_->label[i]);
2270 }
2271 else
2272 {
2273 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2274 free(model_);
2275 return NULL;
2276 }
2277 }
2278
2279 nr_feature=model_->nr_feature;
2280 if(model_->bias>=0)
2281 n=nr_feature+1;
2282 else
2283 n=nr_feature;
2284 int w_size = n;
2285 int nr_w;
2286 if(nr_class==2 && param.solver_type != MCSVM_CS)
2287 nr_w = 1;
2288 else
2289 nr_w = nr_class;
2290
2291 model_->w=Malloc(double, w_size*nr_w);
2292 for(i=0; i<w_size; i++)
2293 {
2294 int j;
2295 for(j=0; j<nr_w; j++)
2296 fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
2297 fscanf(fp, "\n");
2298 }
2299 if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2300
2301 return model_;
2302 }
2303
get_nr_feature(const model * model_)2304 int get_nr_feature(const model *model_)
2305 {
2306 return model_->nr_feature;
2307 }
2308
get_nr_class(const model * model_)2309 int get_nr_class(const model *model_)
2310 {
2311 return model_->nr_class;
2312 }
2313
get_labels(const model * model_,int * label)2314 void get_labels(const model *model_, int* label)
2315 {
2316 if (model_->label != NULL)
2317 for(int i=0;i<model_->nr_class;i++)
2318 label[i] = model_->label[i];
2319 }
2320
free_model_content(struct model * model_ptr)2321 void free_model_content(struct model *model_ptr)
2322 {
2323 if(model_ptr->w != NULL)
2324 free(model_ptr->w);
2325 if(model_ptr->label != NULL)
2326 free(model_ptr->label);
2327 }
2328
free_and_destroy_model(struct model ** model_ptr_ptr)2329 void free_and_destroy_model(struct model **model_ptr_ptr)
2330 {
2331 struct model *model_ptr = *model_ptr_ptr;
2332 if(model_ptr != NULL)
2333 {
2334 free_model_content(model_ptr);
2335 free(model_ptr);
2336 }
2337 }
2338
destroy_param(parameter * param)2339 void destroy_param(parameter* param)
2340 {
2341 if(param->weight_label != NULL)
2342 free(param->weight_label);
2343 if(param->weight != NULL)
2344 free(param->weight);
2345 }
2346
check_parameter(const problem * prob,const parameter * param)2347 const char *check_parameter(const problem *prob, const parameter *param)
2348 {
2349 if(param->eps <= 0)
2350 return "eps <= 0";
2351
2352 if(param->C <= 0)
2353 return "C <= 0";
2354
2355 if(param->solver_type != L2R_LR
2356 && param->solver_type != L2R_L2LOSS_SVC_DUAL
2357 && param->solver_type != L2R_L2LOSS_SVC
2358 && param->solver_type != L2R_L1LOSS_SVC_DUAL
2359 && param->solver_type != MCSVM_CS
2360 && param->solver_type != L1R_L2LOSS_SVC
2361 && param->solver_type != L1R_LR
2362 && param->solver_type != L2R_LR_DUAL)
2363 return "unknown solver type";
2364
2365 return NULL;
2366 }
2367
check_probability_model(const struct model * model_)2368 int check_probability_model(const struct model *model_)
2369 {
2370 return (model_->param.solver_type==L2R_LR ||
2371 model_->param.solver_type==L2R_LR_DUAL ||
2372 model_->param.solver_type==L1R_LR);
2373 }
2374
set_print_string_function(void (* print_func)(const char *))2375 void set_print_string_function(void (*print_func)(const char*))
2376 {
2377 if (print_func == NULL)
2378 liblinear_print_string = &print_string_stdout;
2379 else
2380 liblinear_print_string = print_func;
2381 }
2382
2383