1 /************************************************************************/
2 /*                                                                      */
3 /*   svm_common.h                                                       */
4 /*                                                                      */
5 /*   Definitions and functions used in both svm_learn and svm_classify. */
6 /*                                                                      */
7 /*   Author: Thorsten Joachims                                          */
8 /*   Date: 02.07.02                                                     */
9 /*                                                                      */
10 /*   Copyright (c) 2002  Thorsten Joachims - All rights reserved        */
11 /*                                                                      */
12 /*   This software is available for non-commercial use only. It must    */
13 /*   not be modified and distributed without prior permission of the    */
14 /*   author. The author is not responsible for implications from the    */
15 /*   use of this software.                                              */
16 /*                                                                      */
17 /************************************************************************/
18 
19 /* # define MICROSOFT */  /* uncomment, if compiling with Visual C++ */
20 
21 # define MAXSHRINK 50000    /* maximum number of shrinking rounds */
22 
23 # include <stdio.h>
24 # include <ctype.h>
25 # include <math.h>
26 # include <string.h>
27 # include <stdlib.h>
28 # include <time.h>
29 # include <float.h>
30 
31 /* JL: changed VERSION      to VERSION_SVMLIGHT      and
32  *             VERSION_DATE to VERSIOM_DATE_SVMLING
33  * to avoid conflicts with afni
34  */
35 # define VERSION_SVMLIGHT       "V5.00"
36 # define VERSION_DATE_SVMLIGHT  "30.06.02"
37 
38 # define CFLOAT  float       /* the type of float to use for caching */
39                              /* kernel evaluations. Using float saves */
40                              /* us some memory, but you can use double, too */
41 # define FNUM    long        /* the type used for storing feature ids */
42 # define FVAL    float       /* the type used for storing feature values */
43 
44 # define LINEAR  0           /* linear kernel type */
45 # define POLY    1           /* polynoial kernel type */
46 # define RBF     2           /* rbf kernel type */
47 # define SIGMOID 3           /* sigmoid kernel type */
48 
49 # define CLASSIFICATION 1    /* train classification model */
50 # define REGRESSION     2    /* train regression model */
51 # define RANKING        3    /* train ranking model */
52 
53 typedef struct word {
54   FNUM    wnum;	               /* word number */
55   FVAL    weight;              /* word weight */
56 } WORD;
57 
58 typedef struct doc {
59   long    docnum;              /* Document ID. This has to be the position of
60                                   the document in the training set array. */
61   long    queryid;             /* for learning rankings, constraints are
62 				  generated for documents with the same
63 				  queryID. */
64   double  costfactor;          /* Scales the cost of misclassifying this
65 				  document by this factor. The effect of this
66 				  value is, that the upper bound on the alpha
67 				  for this example is scaled by this factor.
68 				  The factors are set by the feature
69 				  'cost:<val>' in the training data. */
70   double  twonorm_sq;          /* The squared euclidian length of the document
71                                   vector. */
72   WORD    *words;              /* The words/values in the document by
73 				  increasing word-number. */
74 } DOC;
75 
76 typedef struct learn_parm {
77   long   type;                 /* selects between regression and
78 				  classification */
79   double svm_c;                /* upper bound C on alphas */
80   double eps;                  /* regression epsilon (eps=1.0 for
81 				  classification */
82   double svm_costratio;        /* factor to multiply C for positive examples */
83   double transduction_posratio;/* fraction of unlabeled examples to be */
84                                /* classified as positives */
85   long   biased_hyperplane;    /* if nonzero, use hyperplane w*x+b=0
86 				  otherwise w*x=0 */
87   long   svm_maxqpsize;        /* size q of working set */
88   long   svm_newvarsinqp;      /* new variables to enter the working set
89 				  in each iteration */
90   double epsilon_crit;         /* tolerable error for distances used
91 				  in stopping criterion */
92   double epsilon_shrink;       /* how much a multiplier should be above
93 				  zero for shrinking */
94   long   svm_iter_to_shrink;   /* iterations h after which an example can
95 				  be removed by shrinking */
96   long   remove_inconsistent;  /* exclude examples with alpha at C and
97 				  retrain */
98   long   skip_final_opt_check; /* do not check KT-Conditions at the end of
99 				  optimization for examples removed by
100 				  shrinking. WARNING: This might lead to
101 				  sub-optimal solutions! */
102   long   compute_loo;          /* if nonzero, computes leave-one-out
103 				  estimates */
104   double rho;                  /* parameter in xi/alpha-estimates and for
105 				  pruning leave-one-out range [1..2] */
106   long   xa_depth;             /* parameter in xi/alpha-estimates upper
107 				  bounding the number of SV the current
108 				  alpha_t is distributed over */
109   char predfile[200];          /* file for predicitions on unlabeled examples
110 				  in transduction */
111   char alphafile[200];         /* file to store optimal alphas in. use
112 				  empty string if alphas should not be
113 				  output */
114 
115   /* you probably do not want to touch the following */
116   double epsilon_const;        /* tolerable error on eq-constraint */
117   double epsilon_a;            /* tolerable error on alphas at bounds */
118   double opt_precision;        /* precision of solver, set to e.g. 1e-21
119 				  if you get convergence problems */
120 
121   /* the following are only for internal use */
122   long   svm_c_steps;          /* do so many steps for finding optimal C */
123   double svm_c_factor;         /* increase C by this factor every step */
124   double svm_costratio_unlab;
125   double svm_unlabbound;
126   double *svm_cost;            /* individual upper bounds for each var */
127   long   totwords;             /* number of features */
128 
129   /* JL July 2011: Added maximum number of iterations */
130   long max_iterations;
131 
132 } LEARN_PARM;
133 
134 typedef struct kernel_parm {
135   long    kernel_type;   /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */
136   long    poly_degree;
137   double  rbf_gamma;
138   double  coef_lin;
139   double  coef_const;
140   char    custom[50];    /* for user supplied kernel */
141 } KERNEL_PARM;
142 
143 typedef struct model {
144   long    sv_num;
145   long    at_upper_bound;
146   double  b;
147   DOC     **supvec;
148   double  *alpha;
149   long    *index;       /* index from docnum to position in model */
150   long    totwords;     /* number of features */
151   long    totdoc;       /* number of training documents */
152   KERNEL_PARM kernel_parm; /* kernel */
153 
154   /* the following values are not written to file */
155   double  loo_error,loo_recall,loo_precision; /* leave-one-out estimates */
156   double  xa_error,xa_recall,xa_precision;    /* xi/alpha estimates */
157   double  *lin_weights;                       /* weights for linear case using
158 						 folding */
159 } MODEL;
160 
161 typedef struct quadratic_program {
162   long   opt_n;            /* number of variables */
163   long   opt_m;            /* number of linear equality constraints */
164   double *opt_ce,*opt_ce0; /* linear equality constraints */
165   double *opt_g;           /* hessian of objective */
166   double *opt_g0;          /* linear part of objective */
167   double *opt_xinit;       /* initial value for variables */
168   double *opt_low,*opt_up; /* box constraints */
169 } QP;
170 
171 typedef struct kernel_cache {
172   long   *index;  /* cache some kernel evalutations */
173   CFLOAT *buffer; /* to improve speed */
174   long   *invindex;
175   long   *active2totdoc;
176   long   *totdoc2active;
177   long   *lru;
178   long   *occu;
179   long   elems;
180   long   max_elems;
181   long   time;
182   long   activenum;
183   long   buffsize;
184 } KERNEL_CACHE;
185 
186 
187 typedef struct timing_profile {
188   long   time_kernel;
189   long   time_opti;
190   long   time_shrink;
191   long   time_update;
192   long   time_model;
193   long   time_check;
194   long   time_select;
195 } TIMING;
196 
197 typedef struct shrink_state {
198   long   *active;
199   long   *inactive_since;
200   long   deactnum;
201   double **a_history;  /* for shrinking with non-linear kernel */
202   long   maxhistory;
203   double *last_a;      /* for shrinking with linear kernel */
204   double *last_lin;    /* for shrinking with linear kernel */
205 } SHRINK_STATE;
206 
207 double classify_example(MODEL *, DOC *);
208 double classify_example_linear(MODEL *, DOC *);
209 CFLOAT kernel(KERNEL_PARM *, DOC *, DOC *);
210 double custom_kernel(KERNEL_PARM *, DOC *, DOC *);
211 double sprod_ss(WORD *, WORD *);
212 WORD*  sub_ss(WORD *, WORD *);
213 double model_length_s(MODEL *, KERNEL_PARM *);
214 void   clear_vector_n(double *, long);
215 void   add_vector_ns(double *, WORD *, double);
216 double sprod_ns(double *, WORD *);
217 void   add_weight_vector_to_linear_model(MODEL *);
218 void   read_model(char *, MODEL *, long, long);
219 void   read_documents(char *, DOC *, double *, long, long, long *, long *);
220 int    parse_document(char *, DOC *, double *, long *, long);
221 void   nol_ll(char *, long *, long *, long *);
222 long   minl(long, long);
223 long   maxl(long, long);
224 long   get_runtime(void);
225 void   *my_malloc(size_t);
226 void   copyright_notice(void);
227 # ifdef MICROSOFT
228    int isnan(double);
229 # endif
230 
231 static long   verbosity;              /* verbosity level (0-4) */
232 extern long   kernel_cache_statistic;
233 
234 
235