1 #include <stdlib.h>
2 #include <numpy/arrayobject.h>
3 #include "linear.h"
4
5 /*
6 * Convert matrix to sparse representation suitable for liblinear. x is
7 * expected to be an array of length n_samples*n_features.
8 *
9 * Whether the matrix is densely or sparsely populated, the fastest way to
10 * convert it to liblinear's sparse format is to calculate the amount of memory
11 * needed and allocate a single big block.
12 *
13 * Special care must be taken with indices, since liblinear indices start at 1
14 * and not at 0.
15 *
16 * If bias is > 0, we append an item at the end.
17 */
dense_to_sparse(char * x,int double_precision,int n_samples,int n_features,int n_nonzero,double bias)18 static struct feature_node **dense_to_sparse(char *x, int double_precision,
19 int n_samples, int n_features, int n_nonzero, double bias)
20 {
21 float *x32 = (float *)x;
22 double *x64 = (double *)x;
23 struct feature_node **sparse;
24 int i, j; /* number of nonzero elements in row i */
25 struct feature_node *T; /* pointer to the top of the stack */
26 int have_bias = (bias > 0);
27
28 sparse = malloc (n_samples * sizeof(struct feature_node *));
29 if (sparse == NULL)
30 return NULL;
31
32 n_nonzero += (have_bias+1) * n_samples;
33 T = malloc (n_nonzero * sizeof(struct feature_node));
34 if (T == NULL) {
35 free(sparse);
36 return NULL;
37 }
38
39 for (i=0; i<n_samples; ++i) {
40 sparse[i] = T;
41
42 for (j=1; j<=n_features; ++j) {
43 if (double_precision) {
44 if (*x64 != 0) {
45 T->value = *x64;
46 T->index = j;
47 ++ T;
48 }
49 ++ x64; /* go to next element */
50 } else {
51 if (*x32 != 0) {
52 T->value = *x32;
53 T->index = j;
54 ++ T;
55 }
56 ++ x32; /* go to next element */
57 }
58 }
59
60 /* set bias element */
61 if (have_bias) {
62 T->value = bias;
63 T->index = j;
64 ++ T;
65 }
66
67 /* set sentinel */
68 T->index = -1;
69 ++ T;
70 }
71
72 return sparse;
73 }
74
75
76 /*
77 * Convert scipy.sparse.csr to liblinear's sparse data structure
78 */
csr_to_sparse(char * x,int double_precision,int * indices,int * indptr,int n_samples,int n_features,int n_nonzero,double bias)79 static struct feature_node **csr_to_sparse(char *x, int double_precision,
80 int *indices, int *indptr, int n_samples, int n_features, int n_nonzero,
81 double bias)
82 {
83 float *x32 = (float *)x;
84 double *x64 = (double *)x;
85 struct feature_node **sparse;
86 int i, j=0, k=0, n;
87 struct feature_node *T;
88 int have_bias = (bias > 0);
89
90 sparse = malloc (n_samples * sizeof(struct feature_node *));
91 if (sparse == NULL)
92 return NULL;
93
94 n_nonzero += (have_bias+1) * n_samples;
95 T = malloc (n_nonzero * sizeof(struct feature_node));
96 if (T == NULL) {
97 free(sparse);
98 return NULL;
99 }
100
101 for (i=0; i<n_samples; ++i) {
102 sparse[i] = T;
103 n = indptr[i+1] - indptr[i]; /* count elements in row i */
104
105 for (j=0; j<n; ++j) {
106 T->value = double_precision ? x64[k] : x32[k];
107 T->index = indices[k] + 1; /* liblinear uses 1-based indexing */
108 ++T;
109 ++k;
110 }
111
112 if (have_bias) {
113 T->value = bias;
114 T->index = n_features + 1;
115 ++T;
116 ++j;
117 }
118
119 /* set sentinel */
120 T->index = -1;
121 ++T;
122 }
123
124 return sparse;
125 }
126
set_problem(char * X,int double_precision_X,int n_samples,int n_features,int n_nonzero,double bias,char * sample_weight,char * Y)127 struct problem * set_problem(char *X, int double_precision_X, int n_samples,
128 int n_features, int n_nonzero, double bias, char* sample_weight,
129 char *Y)
130 {
131 struct problem *problem;
132 /* not performant but simple */
133 problem = malloc(sizeof(struct problem));
134 if (problem == NULL) return NULL;
135 problem->l = n_samples;
136 problem->n = n_features + (bias > 0);
137 problem->y = (double *) Y;
138 problem->W = (double *) sample_weight;
139 problem->x = dense_to_sparse(X, double_precision_X, n_samples, n_features,
140 n_nonzero, bias);
141 problem->bias = bias;
142
143 if (problem->x == NULL) {
144 free(problem);
145 return NULL;
146 }
147
148 return problem;
149 }
150
csr_set_problem(char * X,int double_precision_X,char * indices,char * indptr,int n_samples,int n_features,int n_nonzero,double bias,char * sample_weight,char * Y)151 struct problem * csr_set_problem (char *X, int double_precision_X,
152 char *indices, char *indptr, int n_samples, int n_features,
153 int n_nonzero, double bias, char *sample_weight, char *Y)
154 {
155 struct problem *problem;
156 problem = malloc (sizeof (struct problem));
157 if (problem == NULL) return NULL;
158 problem->l = n_samples;
159 problem->n = n_features + (bias > 0);
160 problem->y = (double *) Y;
161 problem->W = (double *) sample_weight;
162 problem->x = csr_to_sparse(X, double_precision_X, (int *) indices,
163 (int *) indptr, n_samples, n_features, n_nonzero, bias);
164 problem->bias = bias;
165
166 if (problem->x == NULL) {
167 free(problem);
168 return NULL;
169 }
170
171 return problem;
172 }
173
174
175 /* Create a parameter struct with and return it */
set_parameter(int solver_type,double eps,double C,npy_intp nr_weight,char * weight_label,char * weight,int max_iter,unsigned seed,double epsilon)176 struct parameter *set_parameter(int solver_type, double eps, double C,
177 npy_intp nr_weight, char *weight_label,
178 char *weight, int max_iter, unsigned seed,
179 double epsilon)
180 {
181 struct parameter *param = malloc(sizeof(struct parameter));
182 if (param == NULL)
183 return NULL;
184
185 set_seed(seed);
186 param->solver_type = solver_type;
187 param->eps = eps;
188 param->C = C;
189 param->p = epsilon; // epsilon for epsilon-SVR
190 param->nr_weight = (int) nr_weight;
191 param->weight_label = (int *) weight_label;
192 param->weight = (double *) weight;
193 param->max_iter = max_iter;
194 return param;
195 }
196
copy_w(void * data,struct model * model,int len)197 void copy_w(void *data, struct model *model, int len)
198 {
199 memcpy(data, model->w, len * sizeof(double));
200 }
201
get_bias(struct model * model)202 double get_bias(struct model *model)
203 {
204 return model->bias;
205 }
206
free_problem(struct problem * problem)207 void free_problem(struct problem *problem)
208 {
209 free(problem->x[0]);
210 free(problem->x);
211 free(problem);
212 }
213
free_parameter(struct parameter * param)214 void free_parameter(struct parameter *param)
215 {
216 free(param);
217 }
218
219 /* rely on built-in facility to control verbose output */
print_null(const char * s)220 static void print_null(const char *s) {}
221
print_string_stdout(const char * s)222 static void print_string_stdout(const char *s)
223 {
224 fputs(s ,stdout);
225 fflush(stdout);
226 }
227
228 /* provide convenience wrapper */
set_verbosity(int verbosity_flag)229 void set_verbosity(int verbosity_flag){
230 if (verbosity_flag)
231 set_print_string_function(&print_string_stdout);
232 else
233 set_print_string_function(&print_null);
234 }
235