1 #include <stdlib.h>
2 #include <numpy/arrayobject.h>
3 #include "linear.h"
4 
5 /*
6  * Convert matrix to sparse representation suitable for liblinear. x is
7  * expected to be an array of length n_samples*n_features.
8  *
9  * Whether the matrix is densely or sparsely populated, the fastest way to
10  * convert it to liblinear's sparse format is to calculate the amount of memory
11  * needed and allocate a single big block.
12  *
13  * Special care must be taken with indices, since liblinear indices start at 1
14  * and not at 0.
15  *
16  * If bias is > 0, we append an item at the end.
17  */
dense_to_sparse(char * x,int double_precision,int n_samples,int n_features,int n_nonzero,double bias)18 static struct feature_node **dense_to_sparse(char *x, int double_precision,
19         int n_samples, int n_features, int n_nonzero, double bias)
20 {
21     float *x32 = (float *)x;
22     double *x64 = (double *)x;
23     struct feature_node **sparse;
24     int i, j;                           /* number of nonzero elements in row i */
25     struct feature_node *T;             /* pointer to the top of the stack */
26     int have_bias = (bias > 0);
27 
28     sparse = malloc (n_samples * sizeof(struct feature_node *));
29     if (sparse == NULL)
30         return NULL;
31 
32     n_nonzero += (have_bias+1) * n_samples;
33     T = malloc (n_nonzero * sizeof(struct feature_node));
34     if (T == NULL) {
35         free(sparse);
36         return NULL;
37     }
38 
39     for (i=0; i<n_samples; ++i) {
40         sparse[i] = T;
41 
42         for (j=1; j<=n_features; ++j) {
43             if (double_precision) {
44                 if (*x64 != 0) {
45                     T->value = *x64;
46                     T->index = j;
47                     ++ T;
48                 }
49                 ++ x64; /* go to next element */
50             } else {
51                 if (*x32 != 0) {
52                     T->value = *x32;
53                     T->index = j;
54                     ++ T;
55                 }
56                 ++ x32; /* go to next element */
57             }
58         }
59 
60         /* set bias element */
61         if (have_bias) {
62                 T->value = bias;
63                 T->index = j;
64                 ++ T;
65             }
66 
67         /* set sentinel */
68         T->index = -1;
69         ++ T;
70     }
71 
72     return sparse;
73 }
74 
75 
76 /*
77  * Convert scipy.sparse.csr to liblinear's sparse data structure
78  */
csr_to_sparse(char * x,int double_precision,int * indices,int * indptr,int n_samples,int n_features,int n_nonzero,double bias)79 static struct feature_node **csr_to_sparse(char *x, int double_precision,
80         int *indices, int *indptr, int n_samples, int n_features, int n_nonzero,
81         double bias)
82 {
83     float *x32 = (float *)x;
84     double *x64 = (double *)x;
85     struct feature_node **sparse;
86     int i, j=0, k=0, n;
87     struct feature_node *T;
88     int have_bias = (bias > 0);
89 
90     sparse = malloc (n_samples * sizeof(struct feature_node *));
91     if (sparse == NULL)
92         return NULL;
93 
94     n_nonzero += (have_bias+1) * n_samples;
95     T = malloc (n_nonzero * sizeof(struct feature_node));
96     if (T == NULL) {
97         free(sparse);
98         return NULL;
99     }
100 
101     for (i=0; i<n_samples; ++i) {
102         sparse[i] = T;
103         n = indptr[i+1] - indptr[i]; /* count elements in row i */
104 
105         for (j=0; j<n; ++j) {
106             T->value = double_precision ? x64[k] : x32[k];
107             T->index = indices[k] + 1; /* liblinear uses 1-based indexing */
108             ++T;
109             ++k;
110         }
111 
112         if (have_bias) {
113             T->value = bias;
114             T->index = n_features + 1;
115             ++T;
116             ++j;
117         }
118 
119         /* set sentinel */
120         T->index = -1;
121         ++T;
122     }
123 
124     return sparse;
125 }
126 
set_problem(char * X,int double_precision_X,int n_samples,int n_features,int n_nonzero,double bias,char * sample_weight,char * Y)127 struct problem * set_problem(char *X, int double_precision_X, int n_samples,
128         int n_features, int n_nonzero, double bias, char* sample_weight,
129         char *Y)
130 {
131     struct problem *problem;
132     /* not performant but simple */
133     problem = malloc(sizeof(struct problem));
134     if (problem == NULL) return NULL;
135     problem->l = n_samples;
136     problem->n = n_features + (bias > 0);
137     problem->y = (double *) Y;
138     problem->W = (double *) sample_weight;
139     problem->x = dense_to_sparse(X, double_precision_X, n_samples, n_features,
140                         n_nonzero, bias);
141     problem->bias = bias;
142 
143     if (problem->x == NULL) {
144         free(problem);
145         return NULL;
146     }
147 
148     return problem;
149 }
150 
csr_set_problem(char * X,int double_precision_X,char * indices,char * indptr,int n_samples,int n_features,int n_nonzero,double bias,char * sample_weight,char * Y)151 struct problem * csr_set_problem (char *X, int double_precision_X,
152         char *indices, char *indptr, int n_samples, int n_features,
153         int n_nonzero, double bias, char *sample_weight, char *Y)
154 {
155     struct problem *problem;
156     problem = malloc (sizeof (struct problem));
157     if (problem == NULL) return NULL;
158     problem->l = n_samples;
159     problem->n = n_features + (bias > 0);
160     problem->y = (double *) Y;
161     problem->W = (double *) sample_weight;
162     problem->x = csr_to_sparse(X, double_precision_X, (int *) indices,
163                         (int *) indptr, n_samples, n_features, n_nonzero, bias);
164     problem->bias = bias;
165 
166     if (problem->x == NULL) {
167         free(problem);
168         return NULL;
169     }
170 
171     return problem;
172 }
173 
174 
175 /* Create a parameter struct with and return it */
set_parameter(int solver_type,double eps,double C,npy_intp nr_weight,char * weight_label,char * weight,int max_iter,unsigned seed,double epsilon)176 struct parameter *set_parameter(int solver_type, double eps, double C,
177                                 npy_intp nr_weight, char *weight_label,
178                                 char *weight, int max_iter, unsigned seed,
179                                 double epsilon)
180 {
181     struct parameter *param = malloc(sizeof(struct parameter));
182     if (param == NULL)
183         return NULL;
184 
185     set_seed(seed);
186     param->solver_type = solver_type;
187     param->eps = eps;
188     param->C = C;
189     param->p = epsilon;  // epsilon for epsilon-SVR
190     param->nr_weight = (int) nr_weight;
191     param->weight_label = (int *) weight_label;
192     param->weight = (double *) weight;
193     param->max_iter = max_iter;
194     return param;
195 }
196 
copy_w(void * data,struct model * model,int len)197 void copy_w(void *data, struct model *model, int len)
198 {
199     memcpy(data, model->w, len * sizeof(double));
200 }
201 
get_bias(struct model * model)202 double get_bias(struct model *model)
203 {
204     return model->bias;
205 }
206 
free_problem(struct problem * problem)207 void free_problem(struct problem *problem)
208 {
209     free(problem->x[0]);
210     free(problem->x);
211     free(problem);
212 }
213 
free_parameter(struct parameter * param)214 void free_parameter(struct parameter *param)
215 {
216     free(param);
217 }
218 
219 /* rely on built-in facility to control verbose output */
print_null(const char * s)220 static void print_null(const char *s) {}
221 
print_string_stdout(const char * s)222 static void print_string_stdout(const char *s)
223 {
224     fputs(s ,stdout);
225     fflush(stdout);
226 }
227 
228 /* provide convenience wrapper */
set_verbosity(int verbosity_flag)229 void set_verbosity(int verbosity_flag){
230     if (verbosity_flag)
231         set_print_string_function(&print_string_stdout);
232     else
233         set_print_string_function(&print_null);
234 }
235