1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2017 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /* gretl interface to libsvm for machine learning */
21
22 #include "libgretl.h"
23 #include "libset.h"
24 #include "gretl_mt.h"
25 #include "version.h"
26
27 #ifdef HAVE_MPI
28 # include "gretl_mpi.h"
29 # include "gretl_foreign.h"
30 # include "matrix_extra.h"
31 #endif
32
33 #ifdef _OPENMP
34 # include <omp.h>
35 #endif
36
37 #include "svmlib.h"
38
39 #define DATA_DEBUG 0
40
41 typedef struct svm_problem sv_data;
42 typedef struct svm_node sv_cell;
43 typedef struct svm_model sv_model;
44 typedef struct svm_parameter sv_parm;
45 typedef struct sv_wrapper_ sv_wrapper;
46 typedef struct sv_grid_ sv_grid;
47 typedef struct grid_row_ grid_row;
48
49 /* the following need to be in sync with svmlib.h */
50
51 static const char *svm_type_names[] = {
52 "C-SVC", "nu-SVC", "one-class", "eps-SVR", "nu-SVR",
53 "C-rnk", NULL
54 };
55
56 static const char *kernel_type_names[] = {
57 "linear", "polynomial", "RBF", "sigmoid",
58 "stump", "perc", "laplace", "expo", NULL
59 };
60
61 enum {
62 W_SAVEMOD = 1 << 0, /* saving a model? */
63 W_LOADMOD = 1 << 1, /* loading a model? */
64 W_QUIET = 1 << 2, /* quiet operation? */
65 W_SEARCH = 1 << 3, /* doing parameter search? */
66 W_STDFMT = 1 << 4, /* use plain libsvm format for ranges? */
67 W_SVPARM = 1 << 5, /* saving tuned params to bundle? */
68 W_FOLDVAR = 1 << 6, /* caller-supplied folds variable? */
69 W_YSCALE = 1 << 7, /* scaling the dependent var? */
70 W_CONSEC = 1 << 8, /* using consecutive folds? */
71 W_REFOLD = 1 << 9, /* folds differ across x-validation calls? */
72 W_INTDEP = 1 << 10, /* dependent variable is integer-valued */
73 W_NOTRAIN = 1 << 11, /* not doing any training */
74 W_MPI = 1 << 12 /* use MPI for cross validation */
75 };
76
77 enum {
78 REG_MSE = 1,
79 REG_MAD,
80 REG_ROUND_MAD,
81 REG_ROUND_MISS
82 };
83
84 struct sv_wrapper_ {
85 int auto_type;
86 int flags;
87 int scaling;
88 int t1;
89 int t2;
90 int t2_train;
91 int k;
92 int nfold; /* number of folds for x-validation */
93 int predict; /* are we doing (how much) prediction? */
94 int nproc; /* number of processes (MPI only) */
95 int rank; /* "this" MPI rank, if applicable */
96 int regcrit; /* regression optimality criterion */
97 int do_probs; /* are we doing probability estimation? */
98 double ymin; /* min value of dependent variable */
99 double ymax; /* max value of dependent variable */
100 gretl_matrix *ranges;
101 char *ranges_outfile;
102 char *ranges_infile;
103 char *model_outfile;
104 char *model_infile;
105 char *data_outfile;
106 char foldname[VNAMELEN];
107 sv_grid *grid;
108 char *plot;
109 gretl_matrix *xdata;
110 gretl_matrix *Ptrain;
111 gretl_matrix *Ptest;
112 double svr_sigma;
113 int *flist; /* array of folds IDs, if applicable */
114 int *fsize; /* array of fold sizes, if applicable */
115 unsigned seed;
116 };
117
118 struct sv_parm_info {
119 const char *key;
120 GretlType type;
121 };
122
123 enum {
124 G_C,
125 G_g,
126 G_p
127 };
128
129 struct grid_row_ {
130 double start;
131 double stop;
132 double step;
133 };
134
135 struct sv_grid_ {
136 grid_row row[3];
137 int null[3];
138 int n[3];
139 int linear[3];
140 };
141
142 #define N_PARMS 15
143
144 #define doing_regression(p) (p->svm_type == EPSILON_SVR || p->svm_type == NU_SVR)
145
146 static int gui_mode;
147
148 #ifdef HAVE_MPI
149 static PRN *worker_prn;
150 #endif
151
sv_wrapper_init(sv_wrapper * w,const DATASET * dset)152 static void sv_wrapper_init (sv_wrapper *w, const DATASET *dset)
153 {
154 w->auto_type = EPSILON_SVR;
155 w->flags = 0;
156 w->scaling = 1;
157 w->t1 = dset->t1;
158 w->t2 = dset->t2;
159 w->t2_train = 0;
160 w->k = 0;
161 w->nfold = 0;
162 w->predict = 2;
163 w->nproc = 0;
164 w->rank = -1;
165 w->regcrit = 0;
166 w->do_probs = 0;
167 w->ymin = 0.0;
168 w->ymax = 0.0;
169 w->ranges = NULL;
170 w->ranges_outfile = NULL;
171 w->ranges_infile = NULL;
172 w->model_outfile = NULL;
173 w->model_infile = NULL;
174 w->data_outfile = NULL;
175 w->foldname[0] = '\0';
176 w->grid = NULL;
177 w->plot = NULL;
178 w->xdata = NULL;
179 w->Ptrain = NULL;
180 w->Ptest = NULL;
181 w->svr_sigma = NADBL;
182 w->flist = NULL;
183 w->fsize = NULL;
184 w->seed = time(NULL);
185 }
186
sv_wrapper_free(sv_wrapper * w)187 static void sv_wrapper_free (sv_wrapper *w)
188 {
189 gretl_matrix_free(w->ranges);
190 free(w->ranges_outfile);
191 free(w->ranges_infile);
192 free(w->model_outfile);
193 free(w->model_infile);
194 free(w->data_outfile);
195 free(w->grid);
196 free(w->plot);
197 gretl_matrix_free(w->xdata);
198 gretl_matrix_free(w->Ptrain);
199 gretl_matrix_free(w->Ptest);
200 free(w->flist);
201 free(w->fsize);
202 }
203
maybe_set_svm_seed(const sv_wrapper * w)204 static void maybe_set_svm_seed (const sv_wrapper *w)
205 {
206 if (w->flags & W_REFOLD) {
207 /* set the seed just once */
208 static int set;
209
210 if (!set) {
211 gretl_alt_rand_set_seed(w->seed);
212 set = 1;
213 }
214 } else {
215 /* set the seed on each x-validation run */
216 gretl_alt_rand_set_seed(w->seed);
217 }
218 }
219
loading_model(const sv_wrapper * w)220 static int loading_model (const sv_wrapper *w)
221 {
222 return (w->flags & W_LOADMOD) || w->model_infile != NULL;
223 }
224
saving_model(const sv_wrapper * w)225 static int saving_model (const sv_wrapper *w)
226 {
227 return (w->flags & W_SAVEMOD) || w->model_outfile != NULL;
228 }
229
uses_epsilon(const sv_parm * parm)230 static int uses_epsilon (const sv_parm *parm)
231 {
232 return parm->svm_type == EPSILON_SVR;
233 }
234
uses_gamma(const sv_parm * parm)235 static int uses_gamma (const sv_parm *parm)
236 {
237 return parm->kernel_type == POLY ||
238 parm->kernel_type == RBF ||
239 parm->kernel_type == SIGMOID ||
240 parm->kernel_type == LAPLACE ||
241 parm->kernel_type == EXPO;
242 }
243
uses_coef0(const sv_parm * parm)244 static int uses_coef0 (const sv_parm *parm)
245 {
246 return parm->kernel_type == POLY ||
247 parm->kernel_type == SIGMOID ||
248 parm->kernel_type == STUMP ||
249 parm->kernel_type == PERC;
250 }
251
uses_nu(const sv_parm * parm)252 static int uses_nu (const sv_parm *parm)
253 {
254 return parm->svm_type == NU_SVC ||
255 parm->svm_type == NU_SVR || parm->svm_type == ONE_CLASS;
256 }
257
set_sv_parm_defaults(sv_parm * parm)258 static void set_sv_parm_defaults (sv_parm *parm)
259 {
260 parm->svm_type = -1; /* mark as unknown for now */
261 parm->kernel_type = RBF;
262 parm->degree = 3; /* for polynomial */
263 parm->gamma = 0; /* poly/RBF/sigmoid: default 1.0 / num_features */
264 parm->coef0 = 0; /* for use in kernel function */
265
266 /* training-only variables */
267 parm->cache_size = 1024; /* cache size in MB */
268 parm->eps = 0.001; /* stopping criterion */
269 parm->C = 1; /* cost: for C_SVC, EPSILON_SVR and NU_SVR */
270 parm->nr_weight = 0; /* for C_SVC */
271 parm->weight_label = NULL; /* for C_SVC */
272 parm->weight = NULL; /* for C_SVC */
273 parm->nu = 0.5; /* for NU_SVC, ONE_CLASS, and NU_SVR */
274 parm->p = 0.1; /* for EPSILON_SVR */
275 parm->shrinking = 1; /* use shrinking heuristics */
276 parm->probability = 0; /* do probability estimates */
277 }
278
279 static struct sv_parm_info pinfo[N_PARMS] = {
280 { "svm_type", GRETL_TYPE_INT },
281 { "kernel_type", GRETL_TYPE_INT },
282 { "degree", GRETL_TYPE_INT },
283 { "gamma", GRETL_TYPE_DOUBLE },
284 { "coef0", GRETL_TYPE_DOUBLE },
285 { "cachesize", GRETL_TYPE_DOUBLE },
286 { "toler", GRETL_TYPE_DOUBLE },
287 { "C", GRETL_TYPE_DOUBLE },
288 { "nr_weight", GRETL_TYPE_INT },
289 { "weight_label", GRETL_TYPE_SERIES },
290 { "weight", GRETL_TYPE_SERIES },
291 { "nu", GRETL_TYPE_DOUBLE },
292 { "epsilon", GRETL_TYPE_DOUBLE },
293 { "shrinking", GRETL_TYPE_BOOL },
294 { "probability", GRETL_TYPE_BOOL }
295 };
296
is_sv_parm(const char * s)297 static int is_sv_parm (const char *s)
298 {
299 int i;
300
301 for (i=0; i<N_PARMS; i++) {
302 if (!strcmp(s, pinfo[i].key)) {
303 return 1;
304 }
305 }
306
307 return 0;
308 }
309
type_value_from_string(const char * s,int i,PRN * prn,int * err)310 static int type_value_from_string (const char *s, int i,
311 PRN *prn, int *err)
312 {
313 int j;
314
315 if (i == 0) {
316 /* SVM type */
317 for (j=0; svm_type_names[j] != NULL; j++) {
318 if (!g_ascii_strcasecmp(s, svm_type_names[j])) {
319 return j;
320 }
321 }
322 pprintf(prn, "%s: unrecognized SVM type\n", s);
323 *err = E_INVARG;
324 } else {
325 /* kernel type */
326 for (j=0; kernel_type_names[j] != NULL; j++) {
327 if (!g_ascii_strcasecmp(s, kernel_type_names[j])) {
328 return j;
329 }
330 }
331 pprintf(prn, "%s: unrecognized kernel type\n", s);
332 *err = E_INVARG;
333 }
334
335 return -1;
336 }
337
set_or_store_sv_parm(sv_parm * parm,gretl_bundle * b,int store,PRN * prn)338 static int set_or_store_sv_parm (sv_parm *parm, gretl_bundle *b,
339 int store, PRN *prn)
340 {
341 void *elem[N_PARMS] = {
342 &parm->svm_type,
343 &parm->kernel_type,
344 &parm->degree,
345 &parm->gamma,
346 &parm->coef0,
347 &parm->cache_size,
348 &parm->eps,
349 &parm->C,
350 &parm->nr_weight,
351 &parm->weight_label,
352 &parm->weight,
353 &parm->nu,
354 &parm->p,
355 &parm->shrinking,
356 &parm->probability
357 };
358 int got_kspec = 0;
359 int i, ival;
360 double xval;
361 int err = 0;
362
363 if (store) {
364 /* we're supposed to store @parm in @b */
365 for (i=0; i<N_PARMS && !err; i++) {
366 if (pinfo[i].type == GRETL_TYPE_DOUBLE) {
367 xval = *(double *) elem[i];
368 gretl_bundle_set_scalar(b, pinfo[i].key, xval);
369 } else {
370 ival = *(int *) elem[i];
371 gretl_bundle_set_int(b, pinfo[i].key, ival);
372 }
373 }
374 /* done */
375 return 0;
376 }
377
378 set_sv_parm_defaults(parm);
379
380 for (i=0; i<N_PARMS && !err; i++) {
381 if (gretl_bundle_has_key(b, pinfo[i].key)) {
382 if (i < 2) {
383 /* types of svm and kernel: int or string */
384 GretlType type = 0;
385 void *ptr = gretl_bundle_get_data(b, pinfo[i].key,
386 &type, NULL, &err);
387 if (type == GRETL_TYPE_INT) {
388 *(int *) elem[i] = *(int *) ptr;
389 } else if (type == GRETL_TYPE_DOUBLE) {
390 *(int *) elem[i] = *(double *) ptr;
391 } else if (type == GRETL_TYPE_STRING) {
392 *(int *) elem[i] = type_value_from_string(ptr, i, prn, &err);
393 } else {
394 fprintf(stderr, "parameter %d, bad option type\n", i);
395 err = E_TYPES;
396 }
397 if (i == 1) {
398 got_kspec = 1;
399 }
400 } else if (i >= 8 && i <= 10) {
401 pputs(prn, "Sorry, svm weighting not handled yet\n");
402 err = E_INVARG;
403 } else if (pinfo[i].type == GRETL_TYPE_DOUBLE) {
404 xval = gretl_bundle_get_scalar(b, pinfo[i].key, &err);
405 if (!err) {
406 *(double *) elem[i] = xval;
407 }
408 } else if (pinfo[i].type == GRETL_TYPE_INT ||
409 pinfo[i].type == GRETL_TYPE_BOOL) {
410 ival = gretl_bundle_get_int(b, pinfo[i].key, &err);
411 if (!err) {
412 if (pinfo[i].type == GRETL_TYPE_BOOL) {
413 *(int *) elem[i] = (ival != 0);
414 } else {
415 *(int *) elem[i] = ival;
416 }
417 }
418 }
419 }
420 }
421
422 if (!err && !got_kspec) {
423 /* the user didn't specify a kernel type: we default to
424 Gaussian RBF above, but PERC seems to work better for
425 ranking
426 */
427 if (parm->svm_type == C_RNK) {
428 parm->kernel_type = PERC;
429 }
430 }
431
432 return err;
433 }
434
set_svm_parm(sv_parm * parm,gretl_bundle * b,PRN * prn)435 static int set_svm_parm (sv_parm *parm, gretl_bundle *b,
436 PRN *prn)
437 {
438 return set_or_store_sv_parm(parm, b, 0, prn);
439 }
440
441 /* printing apparatus */
442
443 static PRN *svm_prn;
444
gretl_libsvm_print(const char * s)445 static void gretl_libsvm_print (const char *s)
446 {
447 if (svm_prn != NULL) {
448 pputs(svm_prn, s);
449 gretl_flush(svm_prn);
450 } else {
451 fputs(s, stdout);
452 fflush(stdout);
453 }
454 }
455
gretl_libsvm_noprint(const char * s)456 static void gretl_libsvm_noprint (const char *s)
457 {
458 return;
459 }
460
461 /* end printing apparatus */
462
gretl_destroy_svm_model(sv_model * model)463 static void gretl_destroy_svm_model (sv_model *model)
464 {
465 if (model != NULL) {
466 if (model->l > 0 && model->SV != NULL &&
467 model->SV[0] != NULL) {
468 free(model->SV[0]);
469 }
470 if (model->sv_coef != NULL) {
471 doubles_array_free(model->sv_coef, model->nr_class-1);
472 }
473 free(model->SV);
474 free(model->rho);
475 free(model->label);
476 free(model->probA);
477 free(model->probB);
478 free(model->sv_indices);
479 free(model->nSV);
480 free(model);
481 }
482 }
483
bundle_as_matrix(gretl_bundle * b,const char * key,double * xvals,int n)484 static int bundle_as_matrix (gretl_bundle *b, const char *key,
485 double *xvals, int n)
486 {
487 gretl_matrix *m = gretl_matrix_alloc(n, 1);
488
489 if (m == NULL) {
490 return E_ALLOC;
491 } else {
492 memcpy(m->val, xvals, n * sizeof *xvals);
493 gretl_bundle_donate_data(b, key, m, GRETL_TYPE_MATRIX, 0);
494 return 0;
495 }
496 }
497
bundle_as_list(gretl_bundle * b,const char * key,int * ivals,int n)498 static int bundle_as_list (gretl_bundle *b, const char *key,
499 int *ivals, int n)
500 {
501 int *list = gretl_list_new(n);
502
503 if (list == NULL) {
504 return E_ALLOC;
505 } else {
506 memcpy(list + 1, ivals, n * sizeof *ivals);
507 gretl_bundle_donate_data(b, key, list, GRETL_TYPE_LIST, 0);
508 return 0;
509 }
510 }
511
svm_model_save_to_bundle(const sv_model * model,gretl_bundle * b)512 static int svm_model_save_to_bundle (const sv_model *model,
513 gretl_bundle *b)
514 {
515 const sv_parm *parm = &model->param;
516 gretl_matrix *m = NULL;
517 int i, j, nc, l, ntr;
518 int err = 0;
519
520 gretl_bundle_void_content(b);
521
522 gretl_bundle_set_int(b, "svm_type", parm->svm_type);
523 gretl_bundle_set_int(b, "kernel_type", parm->kernel_type);
524
525 if (parm->kernel_type == POLY) {
526 gretl_bundle_set_int(b, "degree", parm->degree);
527 }
528
529 if (uses_gamma(parm)) {
530 gretl_bundle_set_scalar(b, "gamma", parm->gamma);
531 }
532
533 if (uses_coef0(parm)) {
534 gretl_bundle_set_scalar(b, "coef0", parm->coef0);
535 }
536
537 nc = model->nr_class;
538 l = model->l;
539
540 gretl_bundle_set_int(b, "nr_class", nc);
541 gretl_bundle_set_int(b, "l", l);
542
543 /* number of triangular elements */
544 ntr = nc * (nc - 1) / 2;
545
546 bundle_as_matrix(b, "rho", model->rho, ntr);
547
548 if (model->label != NULL) {
549 bundle_as_list(b, "label", model->label, nc);
550 }
551 if (model->probA != NULL) {
552 bundle_as_matrix(b, "probA", model->probA, ntr);
553 }
554 if (model->probB != NULL) {
555 bundle_as_matrix(b, "probB", model->probB, ntr);
556 }
557 if (model->nSV != NULL) {
558 bundle_as_list(b, "nr_sv", model->nSV, nc);
559 }
560
561 /* store the SVs */
562
563 m = gretl_matrix_alloc(l, nc - 1);
564 if (m != NULL) {
565 for (j=0; j<nc-1; j++) {
566 /* j = class index */
567 for (i=0; i<l; i++) {
568 /* i = row index */
569 gretl_matrix_set(m, i, j, model->sv_coef[j][i]);
570 }
571 }
572 gretl_bundle_donate_data(b, "sv_coef", m,
573 GRETL_TYPE_MATRIX, 0);
574 }
575
576 {
577 gretl_array *aidx, *avec = NULL;
578 gretl_matrix *vec;
579 int *idx;
580 int n_elements = 0;
581
582 aidx = gretl_array_new(GRETL_TYPE_LISTS, l, &err);
583 if (!err) {
584 avec = gretl_array_new(GRETL_TYPE_MATRICES, l, &err);
585 }
586
587 for (i=0; i<l && !err; i++) {
588 const sv_cell *p = model->SV[i];
589 int k, ni = 0;
590
591 /* count the nodes on this row */
592 while (p->index != -1) {
593 ni++;
594 p++;
595 }
596 idx = gretl_list_new(ni);
597 vec = gretl_matrix_alloc(1, ni);
598 if (idx == NULL || vec == NULL) {
599 err = E_ALLOC;
600 break;
601 }
602 p = model->SV[i]; /* reset */
603 for (k=0; k<ni; k++) {
604 idx[k+1] = p[k].index;
605 vec->val[k] = p[k].value;
606 }
607 gretl_array_set_list(aidx, i, idx, 0);
608 gretl_array_set_matrix(avec, i, vec, 0);
609 n_elements += ni + 1;
610 }
611
612 if (err) {
613 gretl_array_destroy(aidx);
614 gretl_array_destroy(avec);
615 } else {
616 gretl_bundle_set_int(b, "n_elements", n_elements);
617 gretl_bundle_donate_data(b, "SV_indices", aidx,
618 GRETL_TYPE_ARRAY, 0);
619 gretl_bundle_donate_data(b, "SV_vecs", avec,
620 GRETL_TYPE_ARRAY, 0);
621 }
622 }
623
624 if (err) {
625 gretl_bundle_void_content(b);
626 }
627
628 return err;
629 }
630
save_results_to_bundle(const sv_parm * parm,sv_wrapper * w,gretl_bundle * b)631 static void save_results_to_bundle (const sv_parm *parm,
632 sv_wrapper *w,
633 gretl_bundle *b)
634 {
635 if (w->xdata != NULL) {
636 int ns = w->xdata->cols;
637 char **S = strings_array_new(ns);
638
639 if (S != NULL) {
640 S[0] = gretl_strdup("C");
641 S[1] = gretl_strdup("gamma");
642 if (ns == 4) {
643 const char *s = uses_epsilon(parm) ? "epsilon" : "nu";
644
645 S[2] = gretl_strdup(s);
646 S[3] = gretl_strdup("score");
647 } else {
648 S[2] = gretl_strdup("score");
649 }
650 gretl_matrix_set_colnames(w->xdata, S);
651 }
652 gretl_bundle_donate_data(b, "xvalid_results", w->xdata,
653 GRETL_TYPE_MATRIX, 0);
654 w->xdata = NULL;
655 }
656 }
657
658 /* If we did probability estimation we should have results
659 in w->Ptrain and/or w->Ptest (in the classification
660 case) or a single Laplace scale value in w->svr_sigma
661 (in the regression case). We now stuff this info into
662 the courier bundle supplied by the caller.
663 */
664
save_probs_to_bundle(sv_wrapper * w,gretl_bundle * b)665 static void save_probs_to_bundle (sv_wrapper *w,
666 gretl_bundle *b)
667 {
668 if (w->Ptrain != NULL) {
669 gretl_bundle_donate_data(b, "Ptrain", w->Ptrain,
670 GRETL_TYPE_MATRIX, 0);
671 w->Ptrain = NULL;
672 }
673 if (w->Ptest != NULL) {
674 gretl_bundle_donate_data(b, "Ptest", w->Ptest,
675 GRETL_TYPE_MATRIX, 0);
676 w->Ptest = NULL;
677 }
678 if (!na(w->svr_sigma)) {
679 gretl_bundle_set_scalar(b, "svr_sigma", w->svr_sigma);
680 }
681 }
682
array_from_bundled_matrix(gretl_bundle * b,const char * key,int required,int * err)683 static double *array_from_bundled_matrix (gretl_bundle *b,
684 const char *key,
685 int required,
686 int *err)
687 {
688 double *ret = NULL;
689
690 if (*err) return NULL;
691
692 if (gretl_bundle_has_key(b, key)) {
693 gretl_matrix *m = gretl_bundle_get_matrix(b, key, err);
694
695 if (m != NULL) {
696 int n = m->rows * m->cols;
697
698 ret = malloc(n * sizeof *ret);
699 if (ret == NULL) {
700 *err = E_ALLOC;
701 } else {
702 memcpy(ret, m->val, n * sizeof *ret);
703 }
704 }
705 } else if (required) {
706 gretl_errmsg_sprintf("svm model: required matrix %s was not found", key);
707 *err = E_DATA;
708 }
709
710 return ret;
711 }
712
array_from_bundled_list(gretl_bundle * b,const char * key,int required,int * err)713 static int *array_from_bundled_list (gretl_bundle *b,
714 const char *key,
715 int required,
716 int *err)
717 {
718 int *ret = NULL;
719
720 if (*err) return NULL;
721
722 if (gretl_bundle_has_key(b, key)) {
723 int *list = gretl_bundle_get_list(b, key, err);
724
725 if (list != NULL) {
726 int n = list[0];
727
728 ret = malloc(n * sizeof *ret);
729 if (ret == NULL) {
730 *err = E_ALLOC;
731 } else {
732 memcpy(ret, list + 1, n * sizeof *ret);
733 }
734 }
735 } else if (required) {
736 gretl_errmsg_sprintf("svm model: required list %s was not found", key);
737 *err = E_DATA;
738 }
739
740 return ret;
741 }
742
svm_model_from_bundle(gretl_bundle * b,int * err)743 static sv_model *svm_model_from_bundle (gretl_bundle *b,
744 int *err)
745 {
746 struct svm_parameter *parm;
747 sv_model *model;
748 gretl_matrix *m;
749 int n_elements = 0;
750 int nc = 0, l = 0;
751 int i, j;
752
753 model = malloc(sizeof *model);
754 if (model == NULL) {
755 *err = E_ALLOC;
756 return NULL;
757 }
758
759 memset(model, 0, sizeof *model);
760 parm = &model->param;
761 *err = set_svm_parm(parm, b, NULL);
762
763 if (!*err) {
764 nc = model->nr_class = gretl_bundle_get_int(b, "nr_class", err);
765 l = model->l = gretl_bundle_get_int(b, "l", err);
766 n_elements = gretl_bundle_get_int(b, "n_elements", err);
767 if (nc <= 0 || l <= 0 || n_elements <= 0) {
768 *err = E_DATA;
769 }
770 }
771
772 model->rho = array_from_bundled_matrix(b, "rho", 1, err);
773
774 /* not always present */
775 model->label = array_from_bundled_list(b, "label", 0, err);
776 model->probA = array_from_bundled_matrix(b, "probA", 0, err);
777 model->probB = array_from_bundled_matrix(b, "probB", 0, err);
778 model->nSV = array_from_bundled_list(b, "nr_sv", 0, err);
779
780 /* load the SVs */
781
782 if (!*err) {
783 m = gretl_bundle_get_matrix(b, "sv_coef", err);
784 if (m == NULL) {
785 *err = E_DATA;
786 } else {
787 model->sv_coef = doubles_array_new(nc-1, l);
788 if (model->sv_coef == NULL) {
789 *err = E_ALLOC;
790 } else {
791 double *val = m->val;
792
793 for (j=0; j<nc-1; j++) {
794 memcpy(model->sv_coef[j], val, l * sizeof *val);
795 val += l;
796 }
797 }
798 }
799 }
800
801 if (!*err) {
802 sv_cell *p = NULL, *x_space = NULL;
803 gretl_array *aidx = NULL;
804 gretl_array *avec = NULL;
805 gretl_matrix *vec;
806 int *idx;
807
808 model->SV = malloc(l * sizeof *model->SV);
809 if (model->SV == NULL) {
810 *err = E_ALLOC;
811 } else {
812 x_space = malloc(n_elements * sizeof *x_space);
813 if (x_space == NULL) {
814 *err = E_ALLOC;
815 } else {
816 model->SV[0] = p = x_space;
817 }
818 }
819
820 if (!*err) {
821 aidx = gretl_bundle_get_array(b, "SV_indices", NULL);
822 avec = gretl_bundle_get_array(b, "SV_vecs", NULL);
823 if (gretl_array_get_type(aidx) != GRETL_TYPE_LISTS ||
824 gretl_array_get_type(avec) != GRETL_TYPE_MATRICES) {
825 *err = E_DATA;
826 }
827 }
828
829 for (i=0; i<l && !*err; i++) {
830 int ni;
831
832 model->SV[i] = p;
833 idx = gretl_array_get_element(aidx, i, NULL, err);
834 vec = gretl_array_get_element(avec, i, NULL, err);
835 ni = idx[0];
836 for (j=0; j<ni; j++) {
837 p[j].index = idx[j+1];
838 p[j].value = vec->val[j];
839 }
840 /* add -1 sentinel */
841 p[j].index = -1;
842 p += ni + 1;
843 }
844 }
845
846 if (*err) {
847 gretl_destroy_svm_model(model);
848 model = NULL;
849 }
850
851 return model;
852 }
853
854 /* can use for testing against svm-scale */
855
write_ranges(sv_wrapper * w)856 static int write_ranges (sv_wrapper *w)
857 {
858 const char *fname;
859 int libsvm_format = 0;
860 double lo, hi;
861 int i, idx, vi;
862 FILE *fp;
863
864 fname = gretl_maybe_switch_dir(w->ranges_outfile);
865 fp = gretl_fopen(fname, "wb");
866 if (fp == NULL) {
867 return E_FOPEN;
868 }
869
870 if (w->flags & W_STDFMT) {
871 libsvm_format = 1;
872 }
873
874 gretl_push_c_numeric_locale();
875
876 if (libsvm_format) {
877 fprintf(fp, "x\n%d %d\n",
878 (int) gretl_matrix_get(w->ranges, 0, 0),
879 (int) gretl_matrix_get(w->ranges, 0, 1));
880 } else {
881 fprintf(fp, "x\n%d %d %d\n",
882 (int) gretl_matrix_get(w->ranges, 0, 0),
883 (int) gretl_matrix_get(w->ranges, 0, 1),
884 (int) gretl_matrix_get(w->ranges, 0, 2));
885 }
886
887 for (i=1; i<w->ranges->rows; i++) {
888 idx = gretl_matrix_get(w->ranges, i, 0);
889 lo = gretl_matrix_get(w->ranges, i, 1);
890 hi = gretl_matrix_get(w->ranges, i, 2);
891 if (libsvm_format) {
892 fprintf(fp, "%d %.16g %.16g\n", idx, lo, hi);
893 } else {
894 vi = gretl_matrix_get(w->ranges, i, 3);
895 fprintf(fp, "%d %.16g %.16g %d\n", idx, lo, hi, vi);
896 }
897 }
898
899 gretl_pop_c_numeric_locale();
900
901 fclose(fp);
902
903 return 0;
904 }
905
read_ranges(sv_wrapper * w)906 static int read_ranges (sv_wrapper *w)
907 {
908 const char *fname;
909 FILE *fp;
910 char line[512];
911 double lo, hi, j;
912 int read_lims = 0;
913 int ncols = 4;
914 int i, vi, idx, n = 0;
915 int err = 0;
916
917 fname = gretl_maybe_switch_dir(w->ranges_infile);
918 fp = gretl_fopen(fname, "rb");
919 if (fp == NULL) {
920 return E_FOPEN;
921 }
922
923 gretl_push_c_numeric_locale();
924
925 while (fgets(line, sizeof line, fp) && !err) {
926 if (*line == 'x') {
927 read_lims = 1;
928 continue;
929 }
930 if (read_lims) {
931 if (ncols == 3) {
932 n = sscanf(line, "%lf %lf\n", &lo, &hi);
933 } else {
934 n = sscanf(line, "%lf %lf %lf\n", &lo, &hi, &j);
935 }
936 if (n != ncols - 1) {
937 err = E_DATA;
938 }
939 read_lims = 0;
940 } else if (!string_is_blank(line)) {
941 n++;
942 }
943 }
944
945 w->ranges = gretl_matrix_alloc(n+1, ncols);
946 if (w->ranges == NULL) {
947 err = E_ALLOC;
948 } else {
949 gretl_matrix_set(w->ranges, 0, 0, lo);
950 gretl_matrix_set(w->ranges, 0, 1, hi);
951 gretl_matrix_set(w->ranges, 0, 2, j);
952 if (ncols == 4) {
953 gretl_matrix_set(w->ranges, 0, 3, 0);
954 }
955 rewind(fp);
956 i = 1;
957 }
958
959 while (fgets(line, sizeof line, fp) && !err) {
960 if (*line == 'x') {
961 if (fgets(line, sizeof line, fp) == NULL) {
962 err = E_DATA;
963 break;
964 } else {
965 continue;
966 }
967 }
968 if (ncols == 3) {
969 n = sscanf(line, "%d %lf %lf\n", &idx, &lo, &hi);
970 } else {
971 n = sscanf(line, "%d %lf %lf %d\n", &idx, &lo, &hi, &vi);
972 }
973 if (n != ncols) {
974 err = E_DATA;
975 } else {
976 gretl_matrix_set(w->ranges, i, 0, idx);
977 gretl_matrix_set(w->ranges, i, 1, lo);
978 gretl_matrix_set(w->ranges, i, 2, hi);
979 if (ncols == 4) {
980 gretl_matrix_set(w->ranges, i, 3, vi);
981 }
982 i++;
983 }
984 }
985
986 gretl_pop_c_numeric_locale();
987
988 fclose(fp);
989
990 return err;
991 }
992
993 /* can use for testing against svm-scale */
994
write_problem(sv_data * p,sv_wrapper * w)995 static int write_problem (sv_data *p, sv_wrapper *w)
996 {
997 const char *fname;
998 FILE *fp;
999 int i, t, idx;
1000 double val;
1001
1002 fname = gretl_maybe_switch_dir(w->data_outfile);
1003 fp = gretl_fopen(fname, "wb");
1004 if (fp == NULL) {
1005 return E_FOPEN;
1006 }
1007
1008 gretl_push_c_numeric_locale();
1009
1010 for (t=0; t<p->l; t++) {
1011 fprintf(fp, "%g ", p->y[t]);
1012 for (i=0; i<w->k; i++) {
1013 idx = p->x[t][i].index;
1014 val = p->x[t][i].value;
1015 if (val != 0) {
1016 fprintf(fp, "%d:%g ", idx, val);
1017 }
1018 }
1019 fputc('\n', fp);
1020 }
1021
1022 gretl_pop_c_numeric_locale();
1023
1024 fclose(fp);
1025
1026 return 0;
1027 }
1028
gretl_sv_data_destroy(sv_data * p,sv_cell * x_space)1029 static void gretl_sv_data_destroy (sv_data *p, sv_cell *x_space)
1030 {
1031 if (p != NULL) {
1032 free(p->y);
1033 free(p->x);
1034 free(p);
1035 }
1036 if (x_space != NULL) {
1037 free(x_space);
1038 }
1039 }
1040
gretl_sv_data_alloc(int T,int k,sv_cell ** px_space,int * err)1041 static sv_data *gretl_sv_data_alloc (int T, int k,
1042 sv_cell **px_space,
1043 int *err)
1044 {
1045 sv_data *p = malloc(sizeof *p);
1046
1047 if (p != NULL) {
1048 p->l = T;
1049 p->y = malloc(T * sizeof *p->y);
1050 p->x = malloc(T * sizeof *p->x);
1051 if (p->y == NULL || p->x == NULL) {
1052 *err = E_ALLOC;
1053 } else {
1054 /* we need an extra cell on each row to hold a
1055 sentinel index value of -1
1056 */
1057 *px_space = malloc(T * (k+1) * sizeof(sv_cell));
1058 if (*px_space == NULL) {
1059 *err = E_ALLOC;
1060 }
1061 }
1062 if (*err) {
1063 gretl_sv_data_destroy(p, NULL);
1064 p = NULL;
1065 }
1066 } else {
1067 *err = E_ALLOC;
1068 }
1069
1070 return p;
1071 }
1072
1073 /* initial discovery of data ranges using the training data */
1074
get_data_ranges(const int * list,const DATASET * dset,sv_wrapper * w)1075 static int get_data_ranges (const int *list,
1076 const DATASET *dset,
1077 sv_wrapper *w)
1078 {
1079 const double *x;
1080 double xmin, xmax;
1081 int lmax = list[0];
1082 int i, j, vi;
1083 int err = 0;
1084
1085 if (w->flags & W_FOLDVAR) {
1086 /* the last series is not a regressor */
1087 lmax--;
1088 }
1089
1090 w->ranges = gretl_matrix_alloc(lmax, 4);
1091 if (w->ranges == NULL) {
1092 return E_ALLOC;
1093 }
1094
1095 /* scaling limits */
1096 xmin = w->scaling == 2 ? 0 : -1;
1097 gretl_matrix_set(w->ranges, 0, 0, xmin); /* lower */
1098 gretl_matrix_set(w->ranges, 0, 1, 1); /* upper */
1099
1100 /* padding */
1101 gretl_matrix_set(w->ranges, 0, 2, 0);
1102 gretl_matrix_set(w->ranges, 0, 3, 0);
1103
1104 j = 0;
1105 for (i=2; i<=lmax; i++) {
1106 vi = list[i];
1107 x = dset->Z[vi];
1108 gretl_minmax(dset->t1, dset->t2, x, &xmin, &xmax);
1109 if (xmin != xmax) {
1110 j++;
1111 gretl_matrix_set(w->ranges, j, 0, j);
1112 gretl_matrix_set(w->ranges, j, 1, xmin);
1113 gretl_matrix_set(w->ranges, j, 2, xmax);
1114 gretl_matrix_set(w->ranges, j, 3, vi);
1115 } else {
1116 fprintf(stderr, "training data: dropping var %d (%s)\n",
1117 vi, dset->varname[vi]);
1118 }
1119 }
1120
1121 if (w->flags & W_YSCALE) {
1122 /* we'll arrange to scale y onto [-1, +1] */
1123 gretl_minmax(dset->t1, dset->t2, dset->Z[list[1]],
1124 &w->ymin, &w->ymax);
1125 }
1126
1127 /* record number of rows actually occupied, which
1128 could be less than the number allocated
1129 */
1130 gretl_matrix_set(w->ranges, 0, 2, j + 1);
1131
1132 return err;
1133 }
1134
1135 /* The following requires a gretl-special "ranges" matrix
1136 with 4 columns, including the dataset IDs of the series.
1137 */
1138
check_test_data(const DATASET * dset,gretl_matrix * ranges,int k)1139 static int check_test_data (const DATASET *dset,
1140 gretl_matrix *ranges,
1141 int k)
1142 {
1143 double xmin, xmax;
1144 int i, n, vi;
1145 int err = 0;
1146
1147 n = 0;
1148 for (i=1; i<=k; i++) {
1149 vi = gretl_matrix_get(ranges, i, 3);
1150 gretl_minmax(dset->t1, dset->t2, dset->Z[vi], &xmin, &xmax);
1151 if (xmin != xmax) {
1152 n++;
1153 } else {
1154 fprintf(stderr, "test data: dropping var %d (%s)\n",
1155 vi, dset->varname[vi]);
1156 /* arrange to exclude this variable by setting the
1157 record of its series ID to zero */
1158 gretl_matrix_set(ranges, i, 3, 0);
1159 }
1160 }
1161
1162 if (n != k) {
1163 fprintf(stderr, "test data: number of usable variables (%d) "
1164 "differs from training data (%d)\n", n, k);
1165 } else {
1166 fprintf(stderr, "test data: number of usable variables "
1167 "agrees with training data\n");
1168 }
1169
1170 return err;
1171 }
1172
1173 /* apply scaling as per the svm-scale binary */
1174
scale_x(double val,double lo,double hi,double scalemin,double scalemax)1175 static double scale_x (double val, double lo, double hi,
1176 double scalemin, double scalemax)
1177 {
1178 if (val == lo) {
1179 val = scalemin;
1180 } else if (val == hi) {
1181 val = scalemax;
1182 } else {
1183 val = scalemin + (scalemax - scalemin) *
1184 (val - lo) / (hi - lo);
1185 }
1186
1187 return val;
1188 }
1189
scale_y(double y,sv_wrapper * w)1190 static double scale_y (double y, sv_wrapper *w)
1191 {
1192 return -1 + 2 * (y - w->ymin) / (w->ymax - w->ymin);
1193 }
1194
unscale_y(double y,sv_wrapper * w)1195 static double unscale_y (double y, sv_wrapper *w)
1196 {
1197 return w->ymin + (w->ymax - w->ymin) * (y + 1) / 2.0;
1198 }
1199
sv_data_fill(sv_data * prob,sv_cell * x_space,sv_wrapper * w,const int * list,const DATASET * dset,int pass)1200 static int sv_data_fill (sv_data *prob,
1201 sv_cell *x_space,
1202 sv_wrapper *w,
1203 const int *list,
1204 const DATASET *dset,
1205 int pass)
1206 {
1207 double scalemin, scalemax;
1208 double yt, xit, xmin, xmax;
1209 int i, j, s, t, idx;
1210 int vf = 0, pos = 0;
1211 int vi = list[1];
1212 int all_ints = 0;
1213 int k = w->k;
1214
1215 #if DATA_DEBUG
1216 fprintf(stderr, "sv_data_fill, pass %d\n", pass);
1217 #endif
1218
1219 /* deal with the LHS variable */
1220 if (pass == 1) {
1221 if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi]) ||
1222 series_is_coded(dset, vi)) {
1223 /* classification, not regression */
1224 w->auto_type = C_SVC;
1225 }
1226 all_ints = 1;
1227 }
1228
1229 for (i=0, t=dset->t1; t<=dset->t2; t++, i++) {
1230 yt = dset->Z[vi][t];
1231 if (w->flags & W_YSCALE) {
1232 prob->y[i] = scale_y(yt, w);
1233 } else {
1234 prob->y[i] = yt;
1235 }
1236 if (all_ints && prob->y[i] != floor(prob->y[i])) {
1237 all_ints = 0;
1238 }
1239 }
1240
1241 if (pass == 1 && (w->flags & W_FOLDVAR)) {
1242 w->flist = gretl_list_new(prob->l);
1243 if (w->flist != NULL) {
1244 /* record the ID of the folds series */
1245 vf = list[list[0]];
1246 }
1247 }
1248
1249 if (pass == 1 && all_ints) {
1250 w->flags |= W_INTDEP;
1251 }
1252
1253 /* retrieve the global x-scaling limits */
1254 scalemin = gretl_matrix_get(w->ranges, 0, 0);
1255 scalemax = gretl_matrix_get(w->ranges, 0, 1);
1256
1257 #if DATA_DEBUG
1258 fprintf(stderr, "scalemin = %g, scalemax = %g\n", scalemin, scalemax);
1259 #endif
1260
1261 /* write the scaled x-data into the problem struct */
1262 for (s=0, t=dset->t1; t<=dset->t2; t++, s++) {
1263 if (vf > 0) {
1264 /* record the specified fold for this obs */
1265 w->flist[s+1] = dset->Z[vf][t];
1266 }
1267 prob->x[s] = &x_space[pos];
1268 j = 0;
1269 for (i=1; i<=k; i++) {
1270 if (w->ranges->cols == 4) {
1271 vi = (int) gretl_matrix_get(w->ranges, i, 3);
1272 if (vi <= 0) {
1273 /* may happen when we get to the test data */
1274 continue;
1275 }
1276 }
1277 idx = (int) gretl_matrix_get(w->ranges, i, 0);
1278 xmin = gretl_matrix_get(w->ranges, i, 1);
1279 xmax = gretl_matrix_get(w->ranges, i, 2);
1280 xit = dset->Z[vi][t];
1281 if (na(xit)) {
1282 /* catch missing values */
1283 fprintf(stderr, "skipping NA for var %d, obs %d\n", vi, t+1);
1284 xit = 0;
1285 } else if (w->scaling != 0) {
1286 xit = scale_x(xit, xmin, xmax, scalemin, scalemax);
1287 }
1288 if (xit == 0) {
1289 /* fprintf(stderr, "skipping a 0 data value (var %d)\n", vi); */
1290 continue;
1291 }
1292 prob->x[s][j].index = idx;
1293 prob->x[s][j].value = xit;
1294 pos++;
1295 j++;
1296 }
1297 /* end-of-row sentinel */
1298 prob->x[s][j].index = -1;
1299 prob->x[s][j].value = 0;
1300 pos++;
1301 }
1302
1303 return 0;
1304 }
1305
1306 /* apparatus for sorting labels into ascending order */
1307
1308 struct lsort {
1309 int val;
1310 int pos;
1311 };
1312
ls_compare(const void * a,const void * b)1313 static int ls_compare (const void *a, const void *b)
1314 {
1315 const struct lsort *pa = a;
1316 const struct lsort *pb = b;
1317
1318 return (pa->val > pb->val) - (pa->val < pb->val);
1319 }
1320
1321 /* Allocate a matrix (either w->Ptrain or w->Ptest)
1322 into which we'll write the per-outcome probabilities.
1323 Set the sorted labels as column names for this
1324 matrix, and return in location @pls the sorting
1325 info that will enable us to write the probabilities
1326 into the correct corresponding columns.
1327 */
1328
get_probs_matrix(sv_wrapper * w,sv_model * model,struct lsort ** pls,int training)1329 static gretl_matrix *get_probs_matrix (sv_wrapper *w,
1330 sv_model *model,
1331 struct lsort **pls,
1332 int training)
1333 {
1334 gretl_matrix **targ;
1335 int nc = model->nr_class;
1336 int T, pt1 = 0, pt2 = 0;
1337
1338 if (training) {
1339 targ = &w->Ptrain;
1340 pt1 = w->t1;
1341 pt2 = w->t2_train;
1342 } else {
1343 targ = &w->Ptest;
1344 pt1 = w->t2_train + 1;
1345 pt2 = w->t2;
1346 }
1347
1348 T = pt2 - pt1 + 1;
1349 *targ = gretl_matrix_alloc(T, nc);
1350 if (*targ == NULL) {
1351 return NULL;
1352 }
1353
1354 gretl_matrix_set_t1(*targ, pt1);
1355 gretl_matrix_set_t2(*targ, pt2);
1356
1357 if (model->label != NULL) {
1358 char **S = strings_array_new(nc);
1359 struct lsort *ls = malloc(nc * sizeof *ls);
1360
1361 if (S != NULL && ls != NULL) {
1362 char labnum[32];
1363 int i;
1364
1365 for (i=0; i<nc; i++) {
1366 ls[i].val = model->label[i];
1367 ls[i].pos = i;
1368 }
1369 qsort(ls, nc, sizeof *ls, ls_compare);
1370 for (i=0; i<nc; i++) {
1371 sprintf(labnum, "%d", ls[i].val);
1372 S[i] = gretl_strdup(labnum);
1373 }
1374 gretl_matrix_set_colnames(*targ, S);
1375 *pls = ls;
1376 }
1377 }
1378
1379 return *targ;
1380 }
1381
real_svm_predict(double * yhat,sv_data * prob,sv_wrapper * w,sv_model * model,int training,const DATASET * dset,PRN * prn)1382 static int real_svm_predict (double *yhat,
1383 sv_data *prob,
1384 sv_wrapper *w,
1385 sv_model *model,
1386 int training,
1387 const DATASET *dset,
1388 PRN *prn)
1389 {
1390 const char *datastr;
1391 gretl_matrix *P = NULL;
1392 struct lsort *ls = NULL;
1393 int n_correct = 0;
1394 int regression = 0;
1395 int nr_class = 0;
1396 int get_sigma = 0;
1397 double ymean = 0.0;
1398 double TSS = 0.0;
1399 double SSR = 0.0;
1400 double MAD = 0.0;
1401 int misses = 0;
1402 double dev, yhi, yi;
1403 double *pi = NULL;
1404 sv_cell *x;
1405 int i, j, err = 0;
1406
1407 if (model->param.svm_type == EPSILON_SVR ||
1408 model->param.svm_type == NU_SVR) {
1409 regression = 1;
1410 if (w->flags & W_YSCALE) {
1411 for (i=0; i<prob->l; i++) {
1412 ymean += unscale_y(prob->y[i], w);
1413 }
1414 ymean /= prob->l;
1415 } else {
1416 ymean = gretl_mean(0, prob->l - 1, prob->y);
1417 }
1418 }
1419
1420 if (model->param.probability) {
1421 if (model->probA == NULL) {
1422 fprintf(stderr, "probability requested but no probA!\n");
1423 w->do_probs = 0;
1424 } else if (regression) {
1425 get_sigma = training;
1426 } else {
1427 /* classification */
1428 nr_class = svm_get_nr_class(model);
1429 P = get_probs_matrix(w, model, &ls, training);
1430 if (P == NULL) {
1431 err = E_ALLOC;
1432 } else {
1433 pi = malloc(nr_class * sizeof *pi);
1434 w->do_probs = 1;
1435 }
1436 }
1437 }
1438
1439 if (err) {
1440 return err;
1441 }
1442
1443 if (w->do_probs) {
1444 maybe_set_svm_seed(w);
1445 }
1446
1447 pprintf(prn, "Calling prediction function (this may take a while)\n");
1448 gretl_flush(prn);
1449 for (i=0; i<prob->l; i++) {
1450 x = prob->x[i];
1451 if (w->do_probs) {
1452 yhi = svm_predict_probability(model, x, pi);
1453 for (j=0; j<nr_class; j++) {
1454 /* transcribe probability estimates */
1455 if (ls != NULL) {
1456 /* re-order the columns */
1457 gretl_matrix_set(P, i, j, pi[ls[j].pos]);
1458 } else {
1459 gretl_matrix_set(P, i, j, pi[j]);
1460 }
1461 }
1462 } else {
1463 yhi = svm_predict(model, x);
1464 }
1465 yi = prob->y[i];
1466 if (!regression) {
1467 n_correct += (yhi == yi);
1468 }
1469 if (w->flags & W_YSCALE) {
1470 yhi = unscale_y(yhi, w);
1471 yi = unscale_y(yi, w);
1472 }
1473 yhat[dset->t1 + i] = yhi;
1474 if (regression) {
1475 /* deviation from mean */
1476 dev = yi - ymean;
1477 TSS += dev * dev;
1478 /* actual minus predicted */
1479 dev = yi - yhi;
1480 SSR += dev * dev;
1481 if (w->regcrit == REG_ROUND_MISS) {
1482 misses += yi != round(yhi);
1483 } else if (w->regcrit == REG_ROUND_MAD) {
1484 MAD += fabs(yi - round(yhi));
1485 } else {
1486 MAD += fabs(dev);
1487 }
1488 }
1489 }
1490
1491 if (pi != NULL) {
1492 free(pi);
1493 free(ls);
1494 } else if (get_sigma) {
1495 /* retrieve estimate of Laplace scale */
1496 w->svr_sigma = svm_get_svr_probability(model);
1497 }
1498
1499 datastr = training ? "Training data" : "Test data";
1500
1501 if (regression) {
1502 MAD /= prob->l;
1503 if (w->regcrit == REG_ROUND_MISS) {
1504 pprintf(prn, "%s: miss ratio = %g (MSE = %g, R^2 = %g)\n", datastr,
1505 misses / (double) prob->l, SSR / prob->l, 1.0 - SSR / TSS);
1506 } else if (w->regcrit > REG_MSE) {
1507 pprintf(prn, "%s: MAD = %g (MSE = %g, R^2 = %g)\n", datastr,
1508 MAD, SSR / prob->l, 1.0 - SSR / TSS);
1509 } else {
1510 pprintf(prn, "%s: MSE = %g, R^2 = %g (MAD = %g)\n", datastr,
1511 SSR / prob->l, 1.0 - SSR / TSS, MAD);
1512 }
1513 } else {
1514 pprintf(prn, "%s: correct predictions = %d (%.1f percent)\n", datastr,
1515 n_correct, 100 * n_correct / (double) prob->l);
1516 }
1517
1518 return 0;
1519 }
1520
print_xvalid_iter(sv_parm * parm,sv_wrapper * w,double val,const char * label,int iter,PRN * prn)1521 static void print_xvalid_iter (sv_parm *parm,
1522 sv_wrapper *w,
1523 double val,
1524 const char *label,
1525 int iter,
1526 PRN *prn)
1527 {
1528 if (iter >= 0) {
1529 pprintf(prn, "[%d] ", iter + 1);
1530 } else {
1531 pputs(prn, "\nCross validation:\n ");
1532 }
1533 pprintf(prn, "C = %g", parm->C);
1534 if (uses_gamma(parm)) {
1535 pprintf(prn, ", gamma = %g", parm->gamma);
1536 }
1537 if (uses_epsilon(parm)) {
1538 pprintf(prn, ", epsilon = %g", parm->p);
1539 } else if (uses_nu(parm)) {
1540 pprintf(prn, ", nu = %g", parm->nu);
1541 }
1542 pprintf(prn, ": %s = %#.8g\n", label, val);
1543 gretl_flush(prn);
1544 }
1545
get_fold_sizes(const sv_data * data,sv_wrapper * w,const DATASET * dset)1546 static int *get_fold_sizes (const sv_data *data,
1547 sv_wrapper *w,
1548 const DATASET *dset)
1549 {
1550 int *ret = gretl_list_new(w->nfold);
1551 int i, t;
1552
1553 if (w->flags & W_CONSEC) {
1554 /* use equally sized consecutive blocks */
1555 int t2 = w->t2_train > 0 ? w->t2_train : dset->t2;
1556 int n = t2 - dset->t1 + 1;
1557 int ni = n / w->nfold;
1558
1559 for (i=1; i<=w->nfold; i++) {
1560 ret[i] = ni;
1561 }
1562 ret[w->nfold] += n % w->nfold;
1563 } else {
1564 /* use the supplied folds series */
1565 for (i=1; i<=w->nfold; i++) {
1566 ret[i] = 0;
1567 for (t=0; t<data->l; t++) {
1568 if (w->flist[t+1] == i) {
1569 ret[i] += 1;
1570 }
1571 }
1572 }
1573 }
1574
1575 return ret;
1576 }
1577
1578 /* Carry out cross validation in the case where the user has provided
1579 a series to specify the "folds", or has specified a given number of
1580 consecutive blocks, as opposed to the default random subsetting.
1581 */
1582
custom_xvalidate(const sv_data * prob,const sv_parm * parm,const sv_wrapper * w,double * targ)1583 static void custom_xvalidate (const sv_data *prob,
1584 const sv_parm *parm,
1585 const sv_wrapper *w,
1586 double *targ)
1587 {
1588 int i, vi, ni;
1589
1590 for (i=0; i<w->nfold; i++) {
1591 struct svm_problem subprob;
1592 struct svm_model *submodel;
1593 int jmin = 0, jmax = 0;
1594 int j, k, useobs;
1595
1596 vi = i + 1;
1597 ni = w->fsize[vi];
1598 subprob.l = prob->l - ni;
1599 subprob.x = malloc(subprob.l * sizeof *subprob.x);
1600 subprob.y = malloc(subprob.l * sizeof *subprob.y);
1601
1602 if (w->flags & W_CONSEC) {
1603 /* find start and end points for fold */
1604 jmin = i * w->fsize[1];
1605 jmax = jmin + ni;
1606 }
1607
1608 /* set the training subsample, excluding fold i */
1609 k = 0;
1610 for (j=0; j<prob->l; j++) {
1611 if (w->flags & W_CONSEC) {
1612 useobs = j < jmin || j >= jmax;
1613 } else {
1614 useobs = w->flist[j+1] != vi;
1615 }
1616 if (useobs) {
1617 subprob.x[k] = prob->x[j];
1618 subprob.y[k] = prob->y[j];
1619 k++;
1620 }
1621 }
1622
1623 /* train on the given subsample */
1624 submodel = svm_train(&subprob, parm);
1625
1626 /* predict on the complementary subsample (fold i only) */
1627 if (w->flags & W_CONSEC) {
1628 /* we don't have to scan the whole prob->x array */
1629 for (j=jmin; j<jmax; j++) {
1630 targ[j] = svm_predict(submodel, prob->x[j]);
1631 }
1632 } else {
1633 /* the values we want may be interspersed */
1634 for (j=0; j<prob->l; j++) {
1635 if (w->flist[j+1] == vi) {
1636 targ[j] = svm_predict(submodel, prob->x[j]);
1637 }
1638 }
1639 }
1640
1641 svm_free_and_destroy_model(&submodel);
1642 free(subprob.x);
1643 free(subprob.y);
1644 }
1645 }
1646
1647 /* implement a single cross validation pass */
1648
xvalidate_once(sv_data * prob,sv_parm * parm,sv_wrapper * w,double * targ,double * crit,int iter,PRN * prn)1649 static int xvalidate_once (sv_data *prob,
1650 sv_parm *parm,
1651 sv_wrapper *w,
1652 double *targ,
1653 double *crit,
1654 int iter,
1655 PRN *prn)
1656 {
1657 int i, n = prob->l;
1658
1659 if (w->fsize != NULL) {
1660 custom_xvalidate(prob, parm, w, targ);
1661 } else {
1662 maybe_set_svm_seed(w);
1663 svm_cross_validation(prob, parm, w->nfold, targ);
1664 }
1665
1666 if (doing_regression(parm)) {
1667 double yi, yhi, dev, minimand = 0;
1668
1669 for (i=0; i<prob->l; i++) {
1670 yi = prob->y[i];
1671 yhi = targ[i];
1672 if (w->flags & W_YSCALE) {
1673 yi = unscale_y(yi, w);
1674 yhi = unscale_y(yhi, w);
1675 }
1676 dev = yi - yhi;
1677 if (w->regcrit == REG_ROUND_MISS) {
1678 minimand += (yi != round(yhi));
1679 } else if (w->regcrit == REG_ROUND_MAD) {
1680 minimand += fabs(yi - round(yhi));
1681 } else if (w->regcrit == REG_MAD) {
1682 minimand += fabs(dev);
1683 } else {
1684 minimand += dev * dev;
1685 }
1686 }
1687 minimand /= n;
1688 if (prn != NULL) {
1689 const char *s = (w->regcrit == REG_MSE)? "MSE" :
1690 (w->regcrit == REG_ROUND_MISS)? "miss ratio" : "MAD";
1691
1692 print_xvalid_iter(parm, w, minimand, s, iter, prn);
1693 }
1694 *crit = -minimand;
1695 } else {
1696 /* classification */
1697 double pc_correct = 0;
1698 int n_correct = 0;
1699
1700 for (i=0; i<n; i++) {
1701 if (targ[i] == prob->y[i]) {
1702 n_correct++;
1703 }
1704 }
1705 pc_correct = 100.0 * n_correct / (double) n;
1706 if (prn != NULL) {
1707 print_xvalid_iter(parm, w, pc_correct,
1708 "percent correct", iter, prn);
1709 }
1710 *crit = pc_correct;
1711 }
1712
1713 return 0;
1714 }
1715
1716 /* For comparison, from libsvm's grid.py:
1717
1718 -log2c {begin,end,step | "null"} : set the range of c (default -5,15,2)
1719 begin,end,step -- c_range = 2^{begin,...,begin+k*step,...,end}
1720 "null" -- do not grid with c
1721 -log2g {begin,end,step | "null"} : set the range of g (default 3,-15,-2)
1722 begin,end,step -- g_range = 2^{begin,...,begin+k*step,...,end}
1723 "null" -- do not with g
1724 */
1725
grid_set_dimensions(sv_grid * g,const gretl_matrix * m)1726 static int grid_set_dimensions (sv_grid *g, const gretl_matrix *m)
1727 {
1728 int i, linvals = 0;
1729 double x;
1730
1731 if (m != NULL && m->cols == 4) {
1732 linvals = 1;
1733 }
1734
1735 for (i=0; i<=G_p; i++) {
1736 if ((g->row[i].stop < g->row[i].start && g->row[i].step >= 0) ||
1737 (g->row[i].stop > g->row[i].start && g->row[i].step <= 0)) {
1738 return E_INVARG;
1739 }
1740 g->null[i] = g->n[i] = g->linear[i] = 0;
1741 if (g->row[i].start == 0 && g->row[i].stop == 0 && g->row[i].step == 0) {
1742 /* flag clamping of this row */
1743 g->null[i] = g->n[i] = 1;
1744 } else if (g->row[i].stop >= g->row[i].start) {
1745 for (x=g->row[i].start; x<=g->row[i].stop; x+=g->row[i].step) {
1746 g->n[i] += 1;
1747 }
1748 } else {
1749 for (x=g->row[i].start; x>=g->row[i].stop; x+=g->row[i].step) {
1750 g->n[i] += 1;
1751 }
1752 }
1753 if (linvals && i < m->rows && gretl_matrix_get(m, i, 3) != 0) {
1754 g->linear[i] = 1;
1755 }
1756 }
1757
1758 return 0;
1759 }
1760
sv_grid_default(sv_grid * g,sv_parm * parm)1761 static void sv_grid_default (sv_grid *g, sv_parm *parm)
1762 {
1763 int gsearch = parm == NULL || uses_gamma(parm);
1764 int altgrid = 0;
1765
1766 if (altgrid) {
1767 g->row[G_C].start = -2;
1768 g->row[G_C].stop = 6;
1769 g->row[G_C].step = 1;
1770 } else {
1771 g->row[G_C].start = -5;
1772 g->row[G_C].stop = 8; /* grid.py has 15 (too big?) */
1773 g->row[G_C].step = 2;
1774 }
1775
1776 if (gsearch) {
1777 g->row[G_g].start = 3;
1778 g->row[G_g].stop = -15;
1779 g->row[G_g].step = -2;
1780 } else {
1781 g->row[G_g].start = 0;
1782 g->row[G_g].stop = 0;
1783 g->row[G_g].step = 0;
1784 }
1785
1786 g->row[G_p].start = 0;
1787 g->row[G_p].stop = 0;
1788 g->row[G_p].step = 0;
1789
1790 grid_set_dimensions(g, NULL);
1791 }
1792
print_grid(sv_grid * g,sv_parm * parm,PRN * prn)1793 static void print_grid (sv_grid *g, sv_parm *parm, PRN *prn)
1794 {
1795 const char *labels[] = {
1796 "C",
1797 "gamma",
1798 ""
1799 };
1800 int i, imax = 2;
1801
1802 if (!g->null[G_p]) {
1803 imax = 3;
1804 labels[2] = uses_nu(parm) ? "nu" : "eps";
1805 }
1806
1807 pputs(prn, "parameter search grid (start, stop, step):\n");
1808
1809 for (i=0; i<imax; i++) {
1810 if (!g->null[i]) {
1811 pprintf(prn, " %-8s %g, %g, %g", labels[i],
1812 g->row[i].start, g->row[i].stop,
1813 g->row[i].step);
1814 if (g->n[i] > 1) {
1815 pprintf(prn, " (%d values, %s)\n", g->n[i],
1816 g->linear[i] ? "linear" : "log2-based");
1817 } else {
1818 pputc(prn, '\n');
1819 }
1820 }
1821 }
1822 pputc(prn, '\n');
1823 }
1824
grid_get_C(sv_grid * g,int i)1825 static double grid_get_C (sv_grid *g, int i)
1826 {
1827 double Ci = g->row[G_C].start + i * g->row[G_C].step;
1828
1829 return g->linear[G_C] ? Ci : pow(2.0, Ci);
1830 }
1831
grid_get_g(sv_grid * g,int j)1832 static double grid_get_g (sv_grid *g, int j)
1833 {
1834 double gj = g->row[G_g].start + j * g->row[G_g].step;
1835
1836 return g->linear[G_g] ? gj : pow(2.0, gj);
1837 }
1838
grid_get_p(sv_grid * g,int k)1839 static double grid_get_p (sv_grid *g, int k)
1840 {
1841 double pk = g->row[G_p].start + k * g->row[G_p].step;
1842
1843 return g->linear[G_p] ? pk : pow(2.0, pk);
1844 }
1845
sv_wrapper_add_grid(sv_wrapper * w,sv_parm * parm,const gretl_matrix * m)1846 static int sv_wrapper_add_grid (sv_wrapper *w,
1847 sv_parm *parm,
1848 const gretl_matrix *m)
1849 {
1850 sv_grid *g;
1851 int i, err = 0;
1852
1853 g = calloc(1, sizeof *g);
1854
1855 if (g == NULL) {
1856 err = E_ALLOC;
1857 } else if (m != NULL) {
1858 if (m->rows < 1 || m->cols < 3) {
1859 err = E_INVARG;
1860 } else {
1861 for (i=0; i<m->rows; i++) {
1862 g->row[i].start = gretl_matrix_get(m, i, 0);
1863 g->row[i].stop = gretl_matrix_get(m, i, 1);
1864 g->row[i].step = gretl_matrix_get(m, i, 2);
1865 }
1866 err = grid_set_dimensions(g, m);
1867 }
1868 } else {
1869 sv_grid_default(g, parm);
1870 }
1871
1872 if (err) {
1873 if (g != NULL) {
1874 free(g);
1875 }
1876 } else {
1877 w->grid = g;
1878 }
1879
1880 return err;
1881 }
1882
sv_wrapper_remove_grid(sv_wrapper * w)1883 static void sv_wrapper_remove_grid (sv_wrapper *w)
1884 {
1885 if (w != NULL && w->grid != NULL) {
1886 free(w->grid);
1887 w->grid = NULL;
1888 }
1889 }
1890
write_plot_file(sv_wrapper * w,sv_parm * parm,double cmax)1891 static int write_plot_file (sv_wrapper *w,
1892 sv_parm *parm,
1893 double cmax)
1894 {
1895 gretl_matrix *m = w->xdata;
1896 const char *zlabel = "MSE";
1897 double xprev, x[3], best[3] = {0};
1898 int critcol = m->cols - 1;
1899 int display, iact = 0;
1900 int i, j, err = 0;
1901 FILE *fp;
1902
1903 set_optval_string(GNUPLOT, OPT_U, w->plot);
1904
1905 if (!strcmp(w->plot, "display")) {
1906 display = iact = 1;
1907 }
1908
1909 if (gui_mode) {
1910 fp = open_3d_plot_input_file(&iact);
1911 } else {
1912 fp = open_plot_input_file(PLOT_USER, 0, &err);
1913 }
1914 if (fp == NULL) {
1915 err = E_FOPEN;
1916 return err;
1917 }
1918
1919 if (!doing_regression(parm)) {
1920 /* must be classification */
1921 zlabel = "correct";
1922 }
1923
1924 gretl_push_c_numeric_locale();
1925
1926 if (w->grid->linear[G_C]) {
1927 fputs("set xlabel 'C'\n", fp);
1928 } else {
1929 fputs("set xlabel 'log2(C)'\n", fp);
1930 }
1931 if (w->grid->linear[G_g]) {
1932 fputs("set ylabel 'gamma'\n", fp);
1933 } else {
1934 fputs("set ylabel 'log2(gamma)'\n", fp);
1935 }
1936 fprintf(fp, "set zlabel '%s'\n", zlabel);
1937 fputs("splot '-' using 1:2:3 title '' w pm3d ,\\\n", fp);
1938 fputs(" '-' using 1:2:3 title 'best' w p lt 1 pt 8\n", fp);
1939 xprev = 0;
1940 for (i=0; i<m->rows; i++) {
1941 x[0] = gretl_matrix_get(m, i, 0);
1942 x[1] = gretl_matrix_get(m, i, 1);
1943 x[2] = gretl_matrix_get(m, i, critcol);
1944 if (x[2] == cmax) {
1945 for (j=0; j<3; j++) {
1946 best[j] = x[j];
1947 }
1948 }
1949 if (i > 0 && x[0] != xprev) {
1950 fputc('\n', fp);
1951 }
1952 xprev = x[0];
1953 fprintf(fp, "%g %g %g\n",
1954 w->grid->linear[0] ? x[0] : log2(x[0]),
1955 w->grid->linear[1] ? x[1] : log2(x[1]),
1956 x[2]);
1957 }
1958 fputs("e\n", fp);
1959 fprintf(fp, "%g %g %g\n",
1960 w->grid->linear[0] ? best[0] : log2(best[0]),
1961 w->grid->linear[1] ? best[1] : log2(best[1]),
1962 best[2]);
1963 fputs("e\n", fp);
1964
1965 gretl_pop_c_numeric_locale();
1966
1967 if (gui_mode && display && iact) {
1968 fputs("pause mouse close\n", fp);
1969 err = finalize_3d_plot_input_file(fp);
1970 if (!err && gui_mode) {
1971 manufacture_gui_callback(GP_ASYNC);
1972 }
1973 } else {
1974 err = finalize_plot_input_file(fp);
1975 if (!err && gui_mode) {
1976 /* wanted? */
1977 manufacture_gui_callback(GNUPLOT);
1978 }
1979 }
1980
1981 return err;
1982 }
1983
1984 /* get ready to do parameter search */
1985
do_search_prep(sv_data * data,sv_parm * parm,sv_wrapper * w,const DATASET * dset,PRN * prn)1986 static int do_search_prep (sv_data *data,
1987 sv_parm *parm,
1988 sv_wrapper *w,
1989 const DATASET *dset,
1990 PRN *prn)
1991 {
1992 int err = 0;
1993
1994 if (0 && parm->kernel_type != RBF) {
1995 pputs(prn, "Non-RBF kernel, don't know how to tune parameters\n");
1996 sv_wrapper_remove_grid(w);
1997 err = E_INVARG;
1998 } else if (w->grid == NULL) {
1999 sv_wrapper_add_grid(w, parm, NULL);
2000 }
2001
2002 if (!err && (w->flags & (W_FOLDVAR | W_CONSEC))) {
2003 w->fsize = get_fold_sizes(data, w, dset);
2004 }
2005
2006 return err;
2007 }
2008
maybe_hush(sv_wrapper * w)2009 static void maybe_hush (sv_wrapper *w)
2010 {
2011 if (!(w->flags & W_QUIET)) {
2012 /* cut out excessive verbosity */
2013 svm_set_print_string_function(gretl_libsvm_noprint);
2014 }
2015 }
2016
maybe_resume_printing(sv_wrapper * w)2017 static void maybe_resume_printing (sv_wrapper *w)
2018 {
2019 if (!(w->flags & W_QUIET)) {
2020 svm_set_print_string_function(gretl_libsvm_print);
2021 }
2022 }
2023
can_write_plot(sv_wrapper * w)2024 static int can_write_plot (sv_wrapper *w)
2025 {
2026 /* for now we handle only the case of a 2D grid
2027 with C and gamma, but this should be generalized
2028 at some point
2029 */
2030 if (w->xdata == NULL) {
2031 return 0;
2032 } else if (w->grid->null[G_C] || w->grid->null[G_g]) {
2033 return 0;
2034 } else if (!w->grid->null[G_p]) {
2035 return 0;
2036 } else {
2037 return 1;
2038 }
2039 }
2040
param_search_finalize(sv_parm * parm,sv_wrapper * w,double cmax,PRN * prn)2041 static void param_search_finalize (sv_parm *parm,
2042 sv_wrapper *w,
2043 double cmax,
2044 PRN *prn)
2045 {
2046 sv_grid *grid = w->grid;
2047
2048 if (w->plot != NULL && can_write_plot(w)) {
2049 write_plot_file(w, parm, fabs(cmax));
2050 }
2051
2052 pprintf(prn, "*** Criterion optimized at %g: C=%g", fabs(cmax), parm->C);
2053 if (uses_gamma(parm)) {
2054 pprintf(prn, ", gamma=%g", parm->gamma);
2055 }
2056 if (grid->null[G_p]) {
2057 ; /* neither epsilon nor nu used in x-validation */
2058 } else if (uses_epsilon(parm)) {
2059 pprintf(prn, ", epsilon=%g", parm->p);
2060 } else if (uses_nu(parm)) {
2061 pprintf(prn, ", nu=%g", parm->nu);
2062 }
2063 pputs(prn, " ***\n");
2064 }
2065
2066 #ifdef HAVE_MPI
2067
cross_validate_worker_task(sv_data * data,sv_parm * parm,sv_wrapper * w,double * targ)2068 static int cross_validate_worker_task (sv_data *data,
2069 sv_parm *parm,
2070 sv_wrapper *w,
2071 double *targ)
2072 {
2073 gretl_matrix *task = NULL;
2074 double crit;
2075 int i, j, seq;
2076 int err = 0;
2077
2078 /* get our sub-task matrix */
2079 task = gretl_matrix_mpi_receive(0, &err);
2080
2081 /* do the actual cross validation */
2082 for (i=0; i<task->rows && !err; i++) {
2083 j = 0;
2084 parm->C = gretl_matrix_get(task, i, j++);
2085 parm->gamma = gretl_matrix_get(task, i, j++);
2086 if (parm->svm_type == EPSILON_SVR) {
2087 parm->p = gretl_matrix_get(task, i, j++);
2088 } else if (parm->svm_type == NU_SVR) {
2089 parm->nu = gretl_matrix_get(task, i, j++);
2090 }
2091 seq = gretl_matrix_get(task, i, task->cols - 1);
2092 err = xvalidate_once(data, parm, w, targ, &crit, seq, worker_prn);
2093 if (!err) {
2094 gretl_matrix_set(task, i, j, crit);
2095 }
2096 }
2097
2098 /* send results back to root */
2099 if (!err) {
2100 err = gretl_matrix_mpi_send(task, 0);
2101 }
2102
2103 /* local clean up */
2104 gretl_matrix_free(task);
2105
2106 return err;
2107 }
2108
allocate_submatrices(int n,int cols,int r1,int nr1)2109 static gretl_matrix **allocate_submatrices (int n,
2110 int cols,
2111 int r1,
2112 int nr1)
2113 {
2114 gretl_matrix **M = malloc(n * sizeof *M);
2115 int i, err = 0;
2116
2117 if (M == NULL) {
2118 err = E_ALLOC;
2119 } else {
2120 for (i=0; i<n && !err; i++) {
2121 if (i < nr1) {
2122 M[i] = gretl_matrix_alloc(r1, cols);
2123 } else {
2124 M[i] = gretl_matrix_alloc(r1 + 1, cols);
2125 }
2126 if (M[i] == NULL) {
2127 err = E_ALLOC;
2128 }
2129 }
2130 }
2131
2132 if (err && M != NULL) {
2133 free(M);
2134 M = NULL;
2135 }
2136
2137 return M;
2138 }
2139
process_results(sv_parm * parm,sv_wrapper * w,gretl_matrix * m,double * pcmax)2140 static int process_results (sv_parm *parm,
2141 sv_wrapper *w,
2142 gretl_matrix *m,
2143 double *pcmax)
2144 {
2145 double crit, cmax = -DBL_MAX;
2146 int i, imax = 0;
2147
2148 for (i=0; i<m->rows; i++) {
2149 crit = gretl_matrix_get(m, i, m->cols - 1);
2150 if (crit > cmax) {
2151 cmax = crit;
2152 imax = i;
2153 }
2154 gretl_matrix_set(m, i, m->cols - 1, fabs(crit));
2155 }
2156
2157 parm->C = gretl_matrix_get(m, imax, 0);
2158 parm->gamma = gretl_matrix_get(m, imax, 1);
2159 if (!w->grid->null[G_p]) {
2160 if (parm->svm_type == EPSILON_SVR) {
2161 parm->p = gretl_matrix_get(m, imax, 2);
2162 } else {
2163 parm->nu = gretl_matrix_get(m, imax, 2);
2164 }
2165 }
2166
2167 /* wrapper takes ownership of @m */
2168 w->xdata = m;
2169
2170 *pcmax = cmax;
2171
2172 return 0;
2173 }
2174
2175 /* The following is called only by the root process,
2176 when doing cross validation via MPI */
2177
carve_up_xvalidation(sv_data * data,sv_parm * parm,sv_wrapper * w,double * targ,double * pcmax,PRN * prn)2178 static int carve_up_xvalidation (sv_data *data,
2179 sv_parm *parm,
2180 sv_wrapper *w,
2181 double *targ,
2182 double *pcmax,
2183 PRN *prn)
2184 {
2185 sv_grid *grid = w->grid;
2186 gretl_matrix **M = NULL;
2187 gretl_matrix *m = NULL;
2188 int *row = NULL;
2189 int nC = grid->n[G_C];
2190 int ng = grid->n[G_g];
2191 int np = grid->n[G_p];
2192 double C, g, p, crit;
2193 double *p3 = NULL;
2194 int ncom, nproc = w->nproc;
2195 int i, j, k, ii;
2196 int r1, nr1, seq;
2197 int rem, ncols = 4;
2198 int err = 0;
2199
2200 if (uses_epsilon(parm)) {
2201 p = parm->p;
2202 ncols++;
2203 if (!grid->null[G_p]) {
2204 p3 = &parm->p;
2205 }
2206 } else if (uses_nu(parm)) {
2207 p = parm->nu;
2208 ncols++;
2209 if (!grid->null[G_p]) {
2210 p3 = &parm->nu;
2211 }
2212 } else {
2213 p = 0;
2214 }
2215
2216 ncom = nC * ng * np; /* number of param combinations */
2217 r1 = ncom / nproc; /* initial rows per matrix */
2218 rem = ncom % nproc; /* remainder, if any */
2219 nr1 = nproc - rem; /* number of matrices with r1 rows */
2220
2221 if (prn != NULL) {
2222 pprintf(prn, "MPI: dividing %d parameter specifications "
2223 "between %d processes\n", ncom, nproc);
2224 }
2225
2226 /* matrices to store per-worker parameter sets */
2227 M = allocate_submatrices(nproc, ncols, r1, nr1);
2228
2229 if (M == NULL) {
2230 err = E_ALLOC;
2231 } else {
2232 /* array to keep track of which row we're on for
2233 each of the submatrices */
2234 row = malloc(nproc * sizeof *row);
2235 if (row == NULL) {
2236 err = E_ALLOC;
2237 } else {
2238 for (i=0; i<nproc; i++) {
2239 row[i] = 0;
2240 }
2241 }
2242 }
2243
2244 if (err) {
2245 goto bailout;
2246 }
2247
2248 m = M[nproc-1]; /* convenience pointer to root's submatrix */
2249 C = parm->C;
2250 g = parm->gamma;
2251
2252 /* We'll dole out the full parameter matrix row by row: this
2253 is an attempt to even up the tasks, given that doing the
2254 cross validation is much more expensive for large values
2255 of C. We add an extra column holding "seq" so that we can
2256 easily put the reassembled matrix into "standard order".
2257 */
2258
2259 ii = seq = 0;
2260 for (i=0; i<nC; i++) {
2261 if (!grid->null[G_C]) {
2262 C = grid_get_C(grid, i);
2263 }
2264 for (j=0; j<ng; j++) {
2265 if (!grid->null[G_g]) {
2266 g = grid_get_g(grid, j);
2267 }
2268 for (k=0; k<np; k++) {
2269 if (!grid->null[G_p]) {
2270 p = grid_get_p(grid, k);
2271 }
2272 if (row[ii] == M[ii]->rows) {
2273 /* skip to the larger matrices */
2274 ii = nr1;
2275 }
2276 gretl_matrix_set(M[ii], row[ii], 0, C);
2277 gretl_matrix_set(M[ii], row[ii], 1, g);
2278 if (ncols == 5) {
2279 gretl_matrix_set(M[ii], row[ii], 2, p);
2280 }
2281 gretl_matrix_set(M[ii], row[ii], ncols - 2, 0);
2282 gretl_matrix_set(M[ii], row[ii], ncols - 1, seq);
2283 seq++;
2284 row[ii] += 1;
2285 if (ii == nproc - 1) {
2286 ii = 0;
2287 } else {
2288 ii++;
2289 }
2290 }
2291 }
2292 }
2293
2294 /* send matrices to workers, keeping the last for root */
2295 for (i=0; i<nproc-1; i++) {
2296 gretl_matrix_mpi_send(M[i], i+1);
2297 gretl_matrix_free(M[i]);
2298 M[i] = NULL;
2299 }
2300
2301 maybe_hush(w);
2302
2303 /* do root's share of the cross validation */
2304 for (i=0; i<m->rows && !err; i++) {
2305 parm->C = gretl_matrix_get(m, i, 0);
2306 parm->gamma = gretl_matrix_get(m, i, 1);
2307 if (!grid->null[G_p]) {
2308 *p3 = gretl_matrix_get(m, i, 2);
2309 }
2310 seq = gretl_matrix_get(m, i, ncols-1);
2311 err = xvalidate_once(data, parm, w, targ, &crit, seq, prn);
2312 if (!err) {
2313 gretl_matrix_set(m, i, ncols-2, crit);
2314 }
2315 }
2316
2317 maybe_resume_printing(w);
2318
2319 /* get results back from workers and process */
2320 if (!err) {
2321 gretl_matrix *Tmp, *Res = NULL;
2322 gretl_matrix *mi;
2323 int row = 0;
2324
2325 Tmp = gretl_matrix_alloc(ncom, 5);
2326 if (Tmp == NULL) {
2327 err = E_ALLOC;
2328 }
2329 for (i=1; i<nproc && !err; i++) {
2330 mi = gretl_matrix_mpi_receive(i, &err);
2331 if (mi != NULL) {
2332 gretl_matrix_inscribe_matrix(Tmp, mi, row, 0, GRETL_MOD_NONE);
2333 row += mi->rows;
2334 gretl_matrix_free(mi);
2335 }
2336 }
2337 if (!err) {
2338 gretl_matrix_inscribe_matrix(Tmp, m, row, 0, GRETL_MOD_NONE);
2339 Res = gretl_matrix_sort_by_column(Tmp, ncols - 1, &err);
2340 }
2341 gretl_matrix_free(Tmp);
2342 if (Res != NULL) {
2343 /* hide the sequence column */
2344 gretl_matrix_reuse(Res, -1, ncols - 1);
2345 err = process_results(parm, w, Res, pcmax);
2346 }
2347 }
2348
2349 bailout:
2350
2351 free(M);
2352 free(row);
2353
2354 return err;
2355 }
2356
call_mpi_cross_validation(sv_data * data,sv_parm * parm,sv_wrapper * w,PRN * prn)2357 static int call_mpi_cross_validation (sv_data *data,
2358 sv_parm *parm,
2359 sv_wrapper *w,
2360 PRN *prn)
2361 {
2362 double cmax = -DBL_MAX;
2363 double *targ;
2364 int err = 0;
2365
2366 targ = malloc(data->l * sizeof *targ);
2367 if (targ == NULL) {
2368 return E_ALLOC;
2369 }
2370
2371 /* arrange to get rand() in sync across the processes */
2372 gretl_mpi_bcast(&w->seed, GRETL_TYPE_UNSIGNED, 0);
2373
2374 if (w->rank == 0) {
2375 if (prn != NULL) {
2376 print_grid(w->grid, parm, prn);
2377 }
2378 err = carve_up_xvalidation(data, parm, w, targ, &cmax, prn);
2379 } else {
2380 err = cross_validate_worker_task(data, parm, w, targ);
2381 }
2382
2383 if (w->rank == 0) {
2384 param_search_finalize(parm, w, cmax, prn);
2385 }
2386
2387 free(targ);
2388
2389 return err;
2390 }
2391
2392 #endif /* HAVE_MPI */
2393
call_cross_validation(sv_data * data,sv_parm * parm,sv_wrapper * w,double * targ,PRN * prn)2394 static int call_cross_validation (sv_data *data,
2395 sv_parm *parm,
2396 sv_wrapper *w,
2397 double *targ,
2398 PRN *prn)
2399 {
2400 double crit;
2401 int err = 0;
2402
2403 pputs(prn, "Cross-validation ");
2404 if (w->flags & W_CONSEC) {
2405 pprintf(prn, "using %d consecutive blocks", w->nfold);
2406 } else if (w->flags & W_FOLDVAR) {
2407 pprintf(prn, "using %d values of %s", w->nfold, w->foldname);
2408 } else {
2409 pprintf(prn, "using %d random folds", w->nfold);
2410 }
2411 pputs(prn, " (may take a while)\n");
2412 gretl_flush(prn);
2413
2414 if (w->grid != NULL) {
2415 sv_grid *grid = w->grid;
2416 double cmax = -DBL_MAX;
2417 double *p3 = NULL;
2418 int nC = grid->n[G_C];
2419 int ng = grid->n[G_g];
2420 int np = grid->n[G_p];
2421 int ibest = 0, jbest = 0, kbest = 0;
2422 int iter = 0;
2423 int ncols = 3;
2424 int i, j, k;
2425
2426 if (prn != NULL) {
2427 print_grid(grid, parm, prn);
2428 }
2429
2430 if (uses_epsilon(parm)) {
2431 ncols++;
2432 p3 = &parm->p;
2433 } else if (uses_nu(parm)) {
2434 ncols++;
2435 p3 = &parm->nu;
2436 }
2437
2438 if ((w->plot != NULL || (w->flags & W_SVPARM)) && (nC > 1 || ng > 1)) {
2439 w->xdata = gretl_matrix_alloc(nC * ng * np, ncols);
2440 }
2441
2442 maybe_hush(w);
2443
2444 for (i=0; i<nC; i++) {
2445 if (!grid->null[G_C]) {
2446 parm->C = grid_get_C(grid, i);
2447 }
2448 for (j=0; j<ng; j++) {
2449 if (!grid->null[G_g]) {
2450 parm->gamma = grid_get_g(grid, j);
2451 }
2452 for (k=0; k<np; k++) {
2453 if (!grid->null[G_p]) {
2454 *p3 = grid_get_p(grid, k);
2455 }
2456 xvalidate_once(data, parm, w, targ, &crit, iter, prn);
2457 if (crit > cmax) {
2458 cmax = crit;
2459 ibest = i;
2460 jbest = j;
2461 kbest = k;
2462 }
2463 if (w->xdata != NULL) {
2464 gretl_matrix_set(w->xdata, iter, 0, parm->C);
2465 gretl_matrix_set(w->xdata, iter, 1, parm->gamma);
2466 if (p3 != NULL) {
2467 gretl_matrix_set(w->xdata, iter, 2, *p3);
2468 }
2469 gretl_matrix_set(w->xdata, iter, ncols - 1, fabs(crit));
2470 }
2471 iter++;
2472 }
2473 }
2474 }
2475
2476 maybe_resume_printing(w);
2477
2478 if (!grid->null[G_C]) {
2479 parm->C = grid_get_C(grid, ibest);
2480 }
2481 if (!grid->null[G_g]) {
2482 parm->gamma = grid_get_g(grid, jbest);
2483 }
2484 if (!grid->null[G_p]) {
2485 *p3 = grid_get_p(grid, kbest);
2486 }
2487 param_search_finalize(parm, w, cmax, prn);
2488 } else {
2489 /* no search: just a single cross validation pass
2490 FIXME save the criterion to optional bundle arg?
2491 */
2492 err = xvalidate_once(data, parm, w, targ, &crit, -1, prn);
2493 }
2494
2495 return err;
2496 }
2497
2498 /* If the caller provided a "folds" series, check it for
2499 validity: the values must be consecutive integers
2500 starting at 1, and the number of folds must be within
2501 bounds.
2502 */
2503
check_folds_series(const int * list,const DATASET * dset,sv_wrapper * w,PRN * prn)2504 static int check_folds_series (const int *list,
2505 const DATASET *dset,
2506 sv_wrapper *w,
2507 PRN *prn)
2508 {
2509 gretl_matrix *fvec;
2510 const double *x;
2511 int v = list[list[0]];
2512 int i, t2, n;
2513 int err = 0;
2514
2515 x = dset->Z[v] + dset->t1;
2516 t2 = w->t2_train > 0 ? w->t2_train : dset->t2;
2517 n = t2 - dset->t1 + 1;
2518
2519 fvec = gretl_matrix_values(x, n, OPT_S, &err);
2520 if (!err) {
2521 n = gretl_vector_get_length(fvec);
2522 x = fvec->val;
2523
2524 for (i=0; i<n && !err; i++) {
2525 if (i > 0) {
2526 if (x[i] != x[i-1] + 1) {
2527 fputs("foldvar must hold consecutive integers\n", stderr);
2528 err = E_DATA;
2529 }
2530 } else if (x[i] != 1.0) {
2531 fputs("foldvar values must start at 1\n", stderr);
2532 err = E_DATA;
2533 }
2534 }
2535
2536 if (!err) {
2537 int nf = x[n-1];
2538
2539 pprintf(prn, "%s: found %d folds\n", dset->varname[v], nf);
2540 if (nf < 2 || (w->nfold > 0 && nf != w->nfold)) {
2541 fprintf(stderr, "invalid number of folds %d\n", nf);
2542 err = E_DATA;
2543 } else {
2544 strcpy(w->foldname, dset->varname[v]);
2545 w->nfold = nf;
2546 }
2547 }
2548 }
2549
2550 gretl_matrix_free(fvec);
2551
2552 return err;
2553 }
2554
2555 /* It's OK if there's no @key value in bundle @b, but if it
2556 is present it should hold an integer value. Return 1
2557 if key found and integer retrieved OK, else 0.
2558 */
2559
get_optional_int(gretl_bundle * b,const char * key,int * ival,int * err)2560 static int get_optional_int (gretl_bundle *b, const char *key,
2561 int *ival, int *err)
2562 {
2563 GretlType type = 0;
2564 void *ptr;
2565
2566 ptr = gretl_bundle_get_data(b, key, &type, NULL, NULL);
2567
2568 if (ptr != NULL) {
2569 if (type == GRETL_TYPE_INT) {
2570 *ival = *(int *) ptr;
2571 return 1;
2572 } else if (type == GRETL_TYPE_DOUBLE) {
2573 *ival = *(double *) ptr;
2574 return 1;
2575 } else if (type == GRETL_TYPE_UNSIGNED) {
2576 *ival = *(unsigned *) ptr;
2577 return 1;
2578 } else if (err != NULL) {
2579 *err = E_TYPES;
2580 }
2581 }
2582
2583 return 0;
2584 }
2585
get_optional_unsigned(gretl_bundle * b,const char * key,unsigned int * uval,int * err)2586 static int get_optional_unsigned (gretl_bundle *b, const char *key,
2587 unsigned int *uval, int *err)
2588 {
2589 GretlType type = 0;
2590 void *ptr;
2591
2592 ptr = gretl_bundle_get_data(b, key, &type, NULL, NULL);
2593
2594 if (ptr != NULL) {
2595 if (type == GRETL_TYPE_INT) {
2596 *uval = *(int *) ptr;
2597 return 1;
2598 } else if (type == GRETL_TYPE_DOUBLE) {
2599 *uval = *(double *) ptr;
2600 return 1;
2601 } else if (type == GRETL_TYPE_UNSIGNED) {
2602 *uval = *(unsigned *) ptr;
2603 return 1;
2604 } else {
2605 *err = E_TYPES;
2606 }
2607 }
2608
2609 return 0;
2610 }
2611
2612 /* Determine if @s is a recognized parameter key: we
2613 do this so we can flag anything that may be a
2614 mistyped key.
2615 */
2616
is_w_parm(const char * s)2617 static int is_w_parm (const char *s)
2618 {
2619 const char *wparms[] = {
2620 "loadmod", "scaling", "predict", "n_train",
2621 "folds", "seed", "quiet", "search",
2622 "foldvar", "consecutive", "yscale",
2623 "search_only", "grid", "ranges_outfile",
2624 "data_outfile", "ranges_infile", "model_outfile",
2625 "model_infile", "plot", "range_format", "refold",
2626 "autoseed", "regcrit", "use_mpi", NULL
2627 };
2628 int i;
2629
2630 for (i=0; wparms[i] != NULL; i++) {
2631 if (!strcmp(s, wparms[i])) {
2632 return 1;
2633 }
2634 }
2635
2636 return 0;
2637 }
2638
check_user_params(gretl_array * A)2639 static int check_user_params (gretl_array *A)
2640 {
2641 char **pstrs;
2642 int i, np, err = 0;
2643
2644 pstrs = gretl_array_get_strings(A, &np);
2645 if (pstrs == NULL) {
2646 return 0; /* ? */
2647 }
2648
2649 for (i=0; i<np; i++) {
2650 if (!is_w_parm(pstrs[i]) && !is_sv_parm(pstrs[i])) {
2651 gretl_errmsg_sprintf("svm: unrecognized parameter '%s'",
2652 pstrs[i]);
2653 err = E_BADOPT;
2654 break;
2655 }
2656 }
2657
2658 return err;
2659 }
2660
2661 /* process @bparm, which contains the parameters provided by
2662 the caller */
2663
read_params_bundle(gretl_bundle * bparm,gretl_bundle * bmod,sv_wrapper * wrap,sv_parm * parm,const int * list,const DATASET * dset,PRN * prn)2664 static int read_params_bundle (gretl_bundle *bparm,
2665 gretl_bundle *bmod,
2666 sv_wrapper *wrap,
2667 sv_parm *parm,
2668 const int *list,
2669 const DATASET *dset,
2670 PRN *prn)
2671 {
2672 gretl_array *A;
2673 int no_savemod = 0;
2674 unsigned int uval;
2675 int ival, err = 0;
2676
2677 /* got any bad keys in @bparm? */
2678 A = gretl_bundle_get_keys(bparm, NULL);
2679 if (A != NULL) {
2680 err = check_user_params(A);
2681 gretl_array_destroy(A);
2682 if (err) {
2683 return err;
2684 }
2685 }
2686
2687 /* start by reading some info that's not included in
2688 the libsvm @parm struct
2689 */
2690
2691 if (get_optional_int(bparm, "loadmod", &ival, &err)) {
2692 if (ival != 0 && bmod == NULL) {
2693 gretl_errmsg_sprintf("svm: invalid 'loadmod' value %d\n", ival);
2694 err = E_INVARG;
2695 } else if (ival != 0) {
2696 wrap->flags |= W_LOADMOD;
2697 no_savemod = 1;
2698 }
2699 }
2700
2701 if (get_optional_int(bparm, "scaling", &ival, &err)) {
2702 if (ival < 0 || ival > 2) {
2703 gretl_errmsg_sprintf("svm: invalid 'scaling' value %d\n", ival);
2704 err = E_INVARG;
2705 } else {
2706 wrap->scaling = ival;
2707 }
2708 }
2709
2710 if (get_optional_int(bparm, "predict", &ival, &err)) {
2711 if (ival < 0 || ival > 2) {
2712 gretl_errmsg_sprintf("svm: invalid 'predict' value %d\n", ival);
2713 err = E_INVARG;
2714 } else {
2715 wrap->predict = ival;
2716 }
2717 }
2718
2719 if (get_optional_int(bparm, "n_train", &ival, &err)) {
2720 /* number of training observations: this sets a range
2721 starting at the first complete observation in the
2722 incoming sample, which was recorded as wrap->t1
2723 after trimming any missing values
2724 */
2725 if (ival == 0) {
2726 wrap->flags |= W_NOTRAIN;
2727 pputs(prn, "n_train = 0, no training will be done\n");
2728 } else {
2729 int nmax = wrap->t2 - wrap->t1 + 1;
2730
2731 if (ival < list[0]) {
2732 gretl_errmsg_sprintf("svm: n_train must be at least %d", list[0]);
2733 err = E_INVARG;
2734 } else if (ival > nmax) {
2735 gretl_errmsg_sprintf("svm: n_train cannot exceed the number of "
2736 "complete observations, %d", nmax);
2737 err = E_INVARG;
2738 } else {
2739 char obs1[OBSLEN], obs2[OBSLEN];
2740
2741 wrap->t2_train = wrap->t1 + ival - 1;
2742 get_obs_string(obs1, wrap->t1, dset);
2743 get_obs_string(obs2, wrap->t2_train, dset);
2744 pprintf(prn, "n_train = %d; use observations %s to %s for training\n",
2745 ival, obs1, obs2);
2746 }
2747 }
2748 }
2749
2750 if (get_optional_int(bparm, "folds", &ival, &err)) {
2751 if (ival < 2) {
2752 gretl_errmsg_sprintf("svm: invalid 'folds' value %d\n", ival);
2753 err = E_INVARG;
2754 } else {
2755 wrap->nfold = ival;
2756 }
2757 }
2758
2759 if (get_optional_unsigned(bparm, "seed", &uval, &err)) {
2760 wrap->seed = uval;
2761 }
2762
2763 if (get_optional_int(bparm, "refold", &ival, &err) && ival != 0) {
2764 wrap->flags |= W_REFOLD;
2765 }
2766
2767 if (get_optional_int(bparm, "quiet", &ival, &err) && ival != 0) {
2768 wrap->flags |= W_QUIET;
2769 }
2770
2771 if (get_optional_int(bparm, "search", &ival, &err) && ival != 0) {
2772 wrap->flags |= W_SEARCH;
2773 }
2774
2775 if (get_optional_int(bparm, "use_mpi", &ival, &err) && ival != 0) {
2776 wrap->flags |= W_MPI;
2777 }
2778
2779 if (get_optional_int(bparm, "foldvar", &ival, &err) && ival != 0) {
2780 wrap->flags |= W_FOLDVAR;
2781 }
2782
2783 if (get_optional_int(bparm, "consecutive", &ival, &err) && ival != 0) {
2784 wrap->flags |= W_CONSEC;
2785 }
2786
2787 if (get_optional_int(bparm, "yscale", &ival, &err) && ival != 0) {
2788 wrap->flags |= W_YSCALE;
2789 }
2790
2791 if (get_optional_int(bparm, "search_only", &ival, &err) && ival != 0) {
2792 wrap->flags |= W_SEARCH;
2793 if (bmod != NULL) {
2794 wrap->flags |= W_SVPARM;
2795 no_savemod = 1;
2796 }
2797 }
2798
2799 if (get_optional_int(bparm, "regcrit", &ival, &err) && ival != 0) {
2800 if (ival < REG_MSE || ival > REG_ROUND_MISS) {
2801 gretl_errmsg_sprintf("svm: invalid regcrit value %d", ival);
2802 err = E_INVARG;
2803 } else {
2804 wrap->regcrit = ival;
2805 }
2806 }
2807
2808 if (!err) {
2809 const gretl_matrix *m;
2810
2811 m = gretl_bundle_get_matrix(bparm, "grid", NULL);
2812 if (m != NULL) {
2813 err = sv_wrapper_add_grid(wrap, NULL, m);
2814 if (!err) {
2815 wrap->flags |= W_SEARCH;
2816 }
2817 }
2818 }
2819
2820 if (!err) {
2821 const char *strval;
2822
2823 strval = gretl_bundle_get_string(bparm, "ranges_outfile", NULL);
2824 if (strval != NULL && *strval != '\0') {
2825 wrap->ranges_outfile = gretl_strdup(strval);
2826 }
2827 strval = gretl_bundle_get_string(bparm, "data_outfile", NULL);
2828 if (strval != NULL && *strval != '\0') {
2829 wrap->data_outfile = gretl_strdup(strval);
2830 }
2831 strval = gretl_bundle_get_string(bparm, "ranges_infile", NULL);
2832 if (strval != NULL && *strval != '\0') {
2833 wrap->ranges_infile = gretl_strdup(strval);
2834 }
2835 strval = gretl_bundle_get_string(bparm, "model_outfile", NULL);
2836 if (strval != NULL && *strval != '\0') {
2837 wrap->model_outfile = gretl_strdup(strval);
2838 }
2839 strval = gretl_bundle_get_string(bparm, "model_infile", NULL);
2840 if (strval != NULL && *strval != '\0') {
2841 wrap->model_infile = gretl_strdup(strval);
2842 }
2843 strval = gretl_bundle_get_string(bparm, "plot", NULL);
2844 if (strval != NULL && *strval != '\0') {
2845 wrap->plot = gretl_strdup(strval);
2846 }
2847 strval = gretl_bundle_get_string(bparm, "range_format", NULL);
2848 if (strval != NULL && !strcmp(strval, "libsvm")) {
2849 wrap->flags |= W_STDFMT;
2850 }
2851 }
2852
2853 if (!err) {
2854 if (bmod != NULL && !no_savemod) {
2855 /* implicitly, a model should be saved to @bmod */
2856 wrap->flags |= W_SAVEMOD;
2857 }
2858 if ((wrap->flags & W_SEARCH) && !(wrap->flags & W_FOLDVAR) &&
2859 wrap->nfold == 0) {
2860 /* default to 5 folds */
2861 wrap->nfold = 5;
2862 }
2863 }
2864
2865 if (!err && (wrap->flags & W_FOLDVAR)) {
2866 if (wrap->flags & W_CONSEC) {
2867 gretl_errmsg_set("The foldvar and consecutive options "
2868 "are incompatible");
2869 err = E_BADOPT;
2870 } else {
2871 err = check_folds_series(list, dset, wrap, prn);
2872 }
2873 }
2874
2875 if (!err) {
2876 /* if we're still OK, fill out the libsvm @parm struct */
2877 err = set_svm_parm(parm, bparm, prn);
2878 }
2879
2880 #if 0 /* FIXME? */
2881 if (!err) {
2882 /* param search: at present we only support the RBF kernel */
2883 if ((wrap->flags & W_SEARCH) && parm->kernel_type != RBF) {
2884 gretl_errmsg_set("parameter search is only supported for "
2885 "the RBF kernel at present");
2886 err = E_INVARG;
2887 }
2888 }
2889 #endif
2890
2891 return err;
2892 }
2893
2894 /* check in advance if the incoming list variable is supposed to
2895 hold a fold variable in last place */
2896
peek_foldvar(gretl_bundle * bparm,int * fvar)2897 static int peek_foldvar (gretl_bundle *bparm, int *fvar)
2898 {
2899 int ival, err = 0;
2900
2901 if (get_optional_int(bparm, "foldvar", &ival, &err) && ival != 0) {
2902 *fvar = 1;
2903 }
2904
2905 return err;
2906 }
2907
2908 #ifdef HAVE_MPI
2909
2910 /* Here we're trying to tell if we should invoke automated
2911 local-machine MPI -- having already ascertained that it's
2912 feasible in general terms.
2913 */
2914
peek_use_mpi(gretl_bundle * bparm,gretl_bundle * bmodel,gretl_bundle * bprob,PRN * prn)2915 static int peek_use_mpi (gretl_bundle *bparm,
2916 gretl_bundle *bmodel,
2917 gretl_bundle *bprob,
2918 PRN *prn)
2919 {
2920 int ival = 0, ret = 0;
2921
2922 if (bmodel != NULL || bprob != NULL) {
2923 /* these cases are not handled yet */
2924 return 0;
2925 }
2926
2927 if (get_optional_int(bparm, "use_mpi", &ival, NULL)) {
2928 /* if the user specified @use_mpi, respect it provisionally */
2929 ret = ival;
2930 } else {
2931 /* otherwise default to using MPI subject to the check below? */
2932 ret = 1;
2933 }
2934
2935 if (ret > 0) {
2936 /* MPI is actually used internally only in parameter search,
2937 so let's peek at that.
2938 */
2939 int search = 0;
2940
2941 ival = 0;
2942 if (get_optional_int(bparm, "search", &ival, NULL) && ival > 0) {
2943 search = 1;
2944 } else if (gretl_bundle_get_matrix(bparm, "grid", NULL) != NULL) {
2945 search = 1;
2946 }
2947 if (!search && ival > 0) {
2948 pputs(prn, "svm: 'use_mpi' option ignored in the absence of search");
2949 pputc(prn, '\n');
2950 ret = 0;
2951 }
2952 }
2953
2954 return ret;
2955 }
2956
2957 #endif
2958
2959 /* wrap a couple of libsvm I/0 functions so they respect
2960 @workdir
2961 */
2962
svm_load_model_wrapper(const char * fname,int * err)2963 static sv_model *svm_load_model_wrapper (const char *fname,
2964 int *err)
2965 {
2966 sv_model *model = NULL;
2967
2968 fname = gretl_maybe_switch_dir(fname);
2969 model = svm_load_model(fname);
2970
2971 if (model == NULL) {
2972 *err = E_FOPEN;
2973 }
2974
2975 return model;
2976 }
2977
svm_save_model_wrapper(const char * fname,const sv_model * model)2978 static int svm_save_model_wrapper (const char *fname,
2979 const sv_model *model)
2980 {
2981 int err = 0;
2982
2983 fname = gretl_maybe_switch_dir(fname);
2984 err = svm_save_model(fname, model);
2985
2986 return err ? E_FOPEN : 0;
2987 }
2988
report_result(int err,PRN * prn)2989 static void report_result (int err, PRN *prn)
2990 {
2991 if (prn != NULL) {
2992 if (err) {
2993 pprintf(prn, "err = %d\n", err);
2994 } else {
2995 pputs(prn, "OK\n");
2996 }
2997 }
2998 }
2999
get_svm_ranges(const int * list,DATASET * dset,sv_wrapper * wrap,PRN * prn)3000 static int get_svm_ranges (const int *list,
3001 DATASET *dset,
3002 sv_wrapper *wrap,
3003 PRN *prn)
3004 {
3005 int err = 0;
3006
3007 if (wrap->ranges_infile != NULL) {
3008 pprintf(prn, "Getting data ranges from %s... ",
3009 wrap->ranges_infile);
3010 err = read_ranges(wrap);
3011 report_result(err, prn);
3012 gretl_flush(prn);
3013 } else {
3014 pprintf(prn, "Getting data ranges (sample = %d to %d)... ",
3015 dset->t1 + 1, dset->t2 + 1);
3016 err = get_data_ranges(list, dset, wrap);
3017 report_result(err, prn);
3018 if (!err && wrap->ranges_outfile != NULL) {
3019 err = write_ranges(wrap);
3020 report_result(err, prn);
3021 }
3022 gretl_flush(prn);
3023 }
3024
3025 return err;
3026 }
3027
3028 /* load an svm model, either from a bundle in memory
3029 or a text file in libsvm format */
3030
do_load_model(sv_wrapper * w,gretl_bundle * b,PRN * prn,int * err)3031 static sv_model *do_load_model (sv_wrapper *w,
3032 gretl_bundle *b,
3033 PRN *prn,
3034 int *err)
3035 {
3036 sv_model *model = NULL;
3037
3038 if (w->flags & W_LOADMOD) {
3039 pputs(prn, "Loading svm model from bundle... ");
3040 model = svm_model_from_bundle(b, err);
3041 } else {
3042 pprintf(prn, "Loading svm model from %s... ", w->model_infile);
3043 model = svm_load_model_wrapper(w->model_infile, err);
3044 }
3045
3046 return model;
3047 }
3048
3049 /* save an svm model, either to a bundle in memory
3050 or a text file in libsvm format */
3051
do_save_model(sv_model * model,sv_wrapper * w,gretl_bundle * b,PRN * prn)3052 static int do_save_model (sv_model *model, sv_wrapper *w,
3053 gretl_bundle *b, PRN *prn)
3054 {
3055 int err = 0;
3056
3057 if (w->flags & W_SAVEMOD) {
3058 pputs(prn, "Saving svm model to bundle... ");
3059 err = svm_model_save_to_bundle(model, b);
3060 }
3061 if (w->model_outfile != NULL) {
3062 pprintf(prn, "Saving svm model as %s... ", w->model_outfile);
3063 err = svm_save_model_wrapper(w->model_outfile, model);
3064 }
3065
3066 return err;
3067 }
3068
3069 /* here we call the libsvm parameter-checking
3070 function; if all is well and we're not in quiet
3071 mode, we print some information on the primary
3072 parameters
3073 */
3074
check_svm_params(sv_data * data,sv_parm * parm,sv_wrapper * w,PRN * prn)3075 static int check_svm_params (sv_data *data,
3076 sv_parm *parm,
3077 sv_wrapper *w,
3078 PRN *prn)
3079 {
3080 const char *msg = svm_check_parameter(data, parm);
3081 int prob_ok;
3082 int err = 0;
3083
3084 prob_ok = parm->svm_type == C_SVC ||
3085 parm->svm_type == NU_SVC ||
3086 parm->svm_type == EPSILON_SVR ||
3087 parm->svm_type == NU_SVR;
3088
3089 pputs(prn, "Checking parameter values... ");
3090 if (msg != NULL) {
3091 pputs(prn, "problem\n");
3092 gretl_errmsg_sprintf("svm: %s", msg);
3093 err = E_INVARG;
3094 } else if (parm->probability && !prob_ok) {
3095 pputs(prn, "problem\n");
3096 gretl_errmsg_set("svm: probability estimates not supported "
3097 "for this specification");
3098 err = E_INVARG;
3099 } else if (!doing_regression(parm) && w->regcrit > 0) {
3100 pputs(prn, "problem\n");
3101 gretl_errmsg_sprintf("svm: cross validation criterion %d not applicable",
3102 w->regcrit);
3103 err = E_INVARG;
3104 } else if (w->regcrit > 2 && !(w->flags & W_INTDEP)) {
3105 pputs(prn, "problem\n");
3106 gretl_errmsg_sprintf("svm: cross validation criterion %d not applicable",
3107 w->regcrit);
3108 err = E_INVARG;
3109 } else if (prn != NULL) {
3110 pputs(prn, "OK\n");
3111 pprintf(prn, "svm_type %s, kernel_type %s\n",
3112 svm_type_names[parm->svm_type],
3113 kernel_type_names[parm->kernel_type]);
3114 pprintf(prn, "initial params: C = %g", parm->C);
3115 if (uses_gamma(parm)) {
3116 pprintf(prn, ", gamma = %g", parm->gamma);
3117 }
3118 if (uses_epsilon(parm)) {
3119 pprintf(prn, ", epsilon = %g", parm->p);
3120 }
3121 if (uses_nu(parm)) {
3122 pprintf(prn, ", nu = %g", parm->nu);
3123 }
3124 pputs(prn, "\n\n");
3125 }
3126
3127 if (!err && doing_regression(parm) && w->regcrit == 0) {
3128 w->regcrit = REG_MSE;
3129 }
3130
3131 return err;
3132 }
3133
3134 /* note: if we're in auto-MPI mode, only the rank 0 process
3135 executes this function
3136 */
3137
svm_predict_main(const int * list,gretl_bundle * bmodel,double * yhat,int * yhat_written,DATASET * dset,PRN * prn,sv_parm * parm,sv_wrapper * wrap,sv_data * prob1)3138 static int svm_predict_main (const int *list,
3139 gretl_bundle *bmodel,
3140 double *yhat,
3141 int *yhat_written,
3142 DATASET *dset,
3143 PRN *prn,
3144 sv_parm *parm,
3145 sv_wrapper *wrap,
3146 sv_data *prob1)
3147 {
3148 sv_data *prob2 = NULL;
3149 sv_cell *x_space2 = NULL;
3150 sv_model *model = NULL;
3151 int do_training = 1;
3152 int err = 0;
3153
3154 gui_mode = gretl_in_gui_mode();
3155
3156 if (!err && wrap->data_outfile != NULL && wrap->rank <= 0) {
3157 err = write_problem(prob1, wrap);
3158 report_result(err, prn);
3159 }
3160
3161 if (wrap->flags & W_NOTRAIN) {
3162 do_training = 0;
3163 }
3164
3165 if (!err && loading_model(wrap)) {
3166 do_training = 0;
3167 model = do_load_model(wrap, bmodel, prn, &err);
3168 report_result(err, prn);
3169 }
3170
3171 if (!err && wrap->nfold > 0 && model == NULL) {
3172 #ifdef HAVE_MPI
3173 if (wrap->nproc > 1) {
3174 err = call_mpi_cross_validation(prob1, parm, wrap, prn);
3175 } else {
3176 err = call_cross_validation(prob1, parm, wrap, yhat + dset->t1,
3177 prn);
3178 if (!err) {
3179 *yhat_written = 1;
3180 }
3181 }
3182 #else
3183 err = call_cross_validation(prob1, parm, wrap, yhat + dset->t1, prn);
3184 if (!err) {
3185 *yhat_written = 1;
3186 }
3187 #endif
3188 if (wrap->flags & W_SEARCH) {
3189 /* continue to train using tuned params? */
3190 if (wrap->flags & W_SVPARM) {
3191 /* no, just save the tuned parms */
3192 save_results_to_bundle(parm, wrap, bmodel);
3193 wrap->predict = do_training = 0;
3194 }
3195 } else {
3196 /* not searching, we're done */
3197 wrap->predict = do_training = 0;
3198 }
3199 }
3200
3201 #if defined(HAVE_MPI) && defined(_OPENMP)
3202 if (wrap->nproc > 1) {
3203 /* we're running more than one process, but from this point
3204 only rank 0 is working, so we can assign more than one OMP
3205 thread to it without worrying about contention?
3206 */
3207 int nc = gretl_n_physical_cores();
3208
3209 omp_set_num_threads(nc);
3210 }
3211 #endif
3212
3213 if (!err && do_training) {
3214 pputs(prn, "Calling training function (this may take a while)\n");
3215 gretl_flush(prn);
3216 if (wrap->do_probs) {
3217 maybe_set_svm_seed(wrap);
3218 }
3219 model = svm_train(prob1, parm);
3220 if (model == NULL) {
3221 err = E_DATA;
3222 }
3223 pprintf(prn, "Training done, err = %d\n", err);
3224 gretl_flush(prn);
3225 }
3226
3227 if (model != NULL && saving_model(wrap)) {
3228 err = do_save_model(model, wrap, bmodel, prn);
3229 report_result(err, prn);
3230 }
3231
3232 if (!err && wrap->predict > 0) {
3233 /* Now do some prediction */
3234 int training = (wrap->t2_train > 0);
3235 int T_os = -1;
3236
3237 err = real_svm_predict(yhat, prob1, wrap, model, training, dset, prn);
3238 if (err) {
3239 goto bailout;
3240 }
3241 *yhat_written = 1;
3242 dset->t2 = wrap->t2;
3243 if (training && wrap->predict > 1) {
3244 T_os = dset->t2 - wrap->t2_train;
3245 }
3246 if (T_os >= 10) {
3247 /* If we have some out-of-sample data, go
3248 ahead and predict out of sample.
3249 */
3250 int T;
3251
3252 dset->t1 = wrap->t2_train + 1;
3253 T = sample_size(dset);
3254 pprintf(prn, "Found %d testing observations\n", T);
3255 err = check_test_data(dset, wrap->ranges, wrap->k);
3256 if (!err) {
3257 prob2 = gretl_sv_data_alloc(T, wrap->k, &x_space2, &err);
3258 }
3259 if (!err) {
3260 sv_data_fill(prob2, x_space2, wrap, list, dset, 2);
3261 err = real_svm_predict(yhat, prob2, wrap, model, 0, dset, prn);
3262 }
3263 }
3264 }
3265
3266 bailout:
3267
3268 gretl_sv_data_destroy(prob2, x_space2);
3269 if (wrap->flags & W_LOADMOD) {
3270 gretl_destroy_svm_model(model);
3271 } else {
3272 svm_free_and_destroy_model(&model);
3273 }
3274 svm_prn = NULL;
3275
3276 return err;
3277 }
3278
sv_trim_missing(int * list,int fvar,DATASET * dset)3279 static int sv_trim_missing (int *list, int fvar, DATASET *dset)
3280 {
3281 int t1 = dset->t1;
3282 int t2 = dset->t2;
3283 int T, nmiss = 0;
3284 int err = 0;
3285
3286 if (fvar) {
3287 /* Temporary adjustment, since we don't require that
3288 the folds variable (in last place) has valid values
3289 at _all_ observations, but only in the training set;
3290 and we'll check for that later.
3291 */
3292 list[0] -= 1;
3293 }
3294
3295 list_adjust_sample(list, &t1, &t2, dset, &nmiss);
3296 T = t2 - t1 + 1 - nmiss;
3297
3298 if (fvar) {
3299 /* reset to the original value */
3300 list[0] += 1;
3301 }
3302
3303 if (T < sample_size(dset)) {
3304 /* we can't handle NAs for the dependent variable within the
3305 (possibly reduced) sample range
3306 */
3307 int t, yno = list[1];
3308
3309 for (t=t1; t<=t2 && !err; t++) {
3310 if (na(dset->Z[yno][t])) {
3311 gretl_errmsg_sprintf("Dependent variable is NA at obs %d", t+1);
3312 err = E_MISSDATA;
3313 }
3314 }
3315 if (!err) {
3316 fprintf(stderr, "sv_trim_missing: t1=%d, t2=%d, excluding %d observations\n",
3317 t1+1, t2+1, sample_size(dset) - T);
3318 }
3319 }
3320
3321 if (!err) {
3322 if (T > list[0]) {
3323 dset->t1 = t1;
3324 dset->t2 = t2;
3325 } else {
3326 err = E_MISSDATA;
3327 }
3328 }
3329
3330 return err;
3331 }
3332
3333 /* If w->seed was automatic, and was actually used, save it
3334 to the incoming bundle in case it may be of interest.
3335 */
3336
maybe_save_auto_seed(sv_wrapper * w,gretl_bundle * b)3337 static void maybe_save_auto_seed (sv_wrapper *w, gretl_bundle *b)
3338 {
3339 if (gretl_bundle_has_key(b, "seed")) {
3340 return; /* no, an incoming seed was specified */
3341 } else if (gretl_bundle_has_key(b, "foldvar") ||
3342 gretl_bundle_has_key(b, "consecutive")) {
3343 return; /* no, randomization of folds not employed */
3344 } else if (gretl_bundle_has_key(b, "folds") ||
3345 gretl_bundle_has_key(b, "search") ||
3346 gretl_bundle_has_key(b, "grid")) {
3347 /* OK, randomized folds employed */
3348 gretl_bundle_set_unsigned(b, "autoseed", w->seed);
3349 }
3350 }
3351
3352 #ifdef HAVE_MPI
3353
forward_to_gretlmpi(const int * list,gretl_bundle * bparams,DATASET * dset,int np,double * yhat,int * got_yhat,PRN * prn)3354 static int forward_to_gretlmpi (const int *list,
3355 gretl_bundle *bparams,
3356 DATASET *dset,
3357 int np,
3358 double *yhat,
3359 int *got_yhat,
3360 PRN *prn)
3361 {
3362 gchar *fname;
3363 int err = 0;
3364
3365 /* write data file for gretlmpi */
3366 fname = gretl_make_dotpath("svmtmp.gdt");
3367 err = write_data(fname, (int *) list, dset, OPT_NONE, NULL);
3368 g_free(fname);
3369
3370 if (!err) {
3371 /* write parameter bundle for gretlmpi */
3372 err = gretl_bundle_write_to_file(bparams, "svmtmp.xml", 1);
3373 }
3374
3375 if (!err) {
3376 /* compose and execute MPI script */
3377 if (np > 1) {
3378 set_optval_int(MPI, OPT_N, np);
3379 err = foreign_start(MPI, NULL, OPT_N, prn);
3380 } else {
3381 err = foreign_start(MPI, NULL, OPT_NONE, prn);
3382 }
3383 if (!err) {
3384 foreign_append("open @dotdir/svmtmp.gdt -q", MPI);
3385 foreign_append("bundle parms = bread(\"@dotdir/svmtmp.xml\")", MPI);
3386 foreign_append("list All = dataset", MPI);
3387 foreign_append("yhat_svm = svm(All, parms)", MPI);
3388 foreign_append("if $mpirank < 1", MPI);
3389 foreign_append(" mwrite({yhat_svm}, \"@dotdir/svmtmp.mat\")", MPI);
3390 foreign_append("endif", MPI);
3391 err = foreign_execute(dset, OPT_L, prn); /* note: --local */
3392 }
3393 }
3394
3395 if (!err) {
3396 gretl_matrix *m;
3397 int s, t;
3398
3399 m = gretl_matrix_read_from_file("svmtmp.mat", 1, &err);
3400 if (m == NULL) {
3401 err = E_DATA;
3402 } else {
3403 if (m->rows == sample_size(dset)) {
3404 for (t=dset->t1, s=0; t<=dset->t2; t++, s++) {
3405 yhat[t] = m->val[s];
3406 }
3407 *got_yhat = 1;
3408 } else {
3409 err = E_DATA;
3410 }
3411 gretl_matrix_free(m);
3412 }
3413 }
3414
3415 return err;
3416 }
3417
3418 #endif
3419
gretl_svm_driver(const int * list,gretl_bundle * bparams,gretl_bundle * bmodel,gretl_bundle * bprob,double * yhat,int * yhat_written,DATASET * dset,PRN * inprn)3420 int gretl_svm_driver (const int *list,
3421 gretl_bundle *bparams,
3422 gretl_bundle *bmodel,
3423 gretl_bundle *bprob,
3424 double *yhat,
3425 int *yhat_written,
3426 DATASET *dset,
3427 PRN *inprn)
3428 {
3429 sv_parm parm;
3430 sv_wrapper wrap;
3431 sv_data *prob = NULL;
3432 sv_cell *x_space = NULL;
3433 PRN *prn = inprn;
3434 int save_t1 = dset->t1;
3435 int save_t2 = dset->t2;
3436 int fvar = 0, lmin = 2;
3437 int T, err = 0;
3438
3439 #ifdef HAVE_MPI
3440 if (gretl_mpi_rank() > 0) {
3441 /* cut out chatter from ranks other than 0 */
3442 prn = NULL;
3443 } else if (auto_mpi_ok()) {
3444 int np = peek_use_mpi(bparams, bmodel, bprob, prn);
3445
3446 if (np > 0) {
3447 return forward_to_gretlmpi(list, bparams, dset, np,
3448 yhat, yhat_written, prn);
3449 }
3450 }
3451 #endif
3452
3453 /* look ahead to see if we're supposed to be getting
3454 a "folds" variable at the end of @list */
3455 err = peek_foldvar(bparams, &fvar);
3456 if (err) {
3457 return err;
3458 } else if (fvar) {
3459 lmin = 3;
3460 }
3461
3462 /* general checking and initialization */
3463 if (list == NULL || list[0] < lmin) {
3464 gretl_errmsg_set("svm: invalid list argument");
3465 err = E_INVARG;
3466 } else {
3467 /* adjust the sample range for any NAs fore or aft */
3468 err = sv_trim_missing((int *) list, fvar, dset);
3469 if (!err) {
3470 sv_wrapper_init(&wrap, dset);
3471 err = read_params_bundle(bparams, bmodel, &wrap, &parm,
3472 list, dset, prn);
3473 }
3474 }
3475
3476 if (err) {
3477 /* restore incoming sample and get out */
3478 dset->t1 = save_t1;
3479 dset->t2 = save_t2;
3480 return err;
3481 }
3482
3483 #ifdef HAVE_MPI
3484 wrap.nproc = gretl_mpi_n_processes();
3485 if (wrap.nproc > 1) {
3486 /* we're actually in MPI mode */
3487 wrap.rank = gretl_mpi_rank();
3488 worker_prn = NULL;
3489 if (wrap.rank > 0) {
3490 /* let rank 0 do (almost) all the talking */
3491 if (!(wrap.flags & W_QUIET)) {
3492 worker_prn = inprn;
3493 wrap.flags |= W_QUIET;
3494 }
3495 }
3496 }
3497 #endif
3498
3499 if (wrap.flags & W_QUIET) {
3500 svm_set_print_string_function(gretl_libsvm_noprint);
3501 prn = NULL;
3502 } else {
3503 svm_prn = prn;
3504 svm_set_print_string_function(gretl_libsvm_print);
3505 }
3506
3507 if (wrap.t2_train > 0) {
3508 /* adjust sample temporarily */
3509 dset->t2 = wrap.t2_train;
3510 }
3511 T = sample_size(dset);
3512
3513 err = get_svm_ranges(list, dset, &wrap, prn);
3514
3515 if (!err) {
3516 wrap.k = (int) gretl_matrix_get(wrap.ranges, 0, 2) - 1;
3517 pputs(prn, "Allocating problem space... ");
3518 prob = gretl_sv_data_alloc(T, wrap.k, &x_space, &err);
3519 report_result(err, prn);
3520 }
3521 gretl_flush(prn);
3522
3523 if (!err) {
3524 /* fill out the "problem" data */
3525 pputs(prn, "Scaling and transcribing data... ");
3526 sv_data_fill(prob, x_space, &wrap, list, dset, 1);
3527 if (parm.svm_type < 0) {
3528 parm.svm_type = wrap.auto_type;
3529 }
3530 if (parm.gamma == 0) {
3531 parm.gamma = 1.0 / wrap.k;
3532 }
3533 pputs(prn, "OK\n");
3534 /* we're now ready to run a full check on @parm */
3535 err = check_svm_params(prob, &parm, &wrap, prn);
3536 if (!err && parm.probability) {
3537 wrap.do_probs = 1;
3538 }
3539 }
3540
3541 if (!err && (wrap.flags & W_SEARCH)) {
3542 err = do_search_prep(prob, &parm, &wrap, dset, prn);
3543 }
3544
3545 if (err) {
3546 goto getout;
3547 }
3548
3549 if (wrap.nproc < 2 || !(wrap.flags & W_SEARCH)) {
3550 /* not doing parameter search via MPI */
3551 err = svm_predict_main(list, bmodel,
3552 yhat, yhat_written,
3553 dset, prn, &parm,
3554 &wrap, prob);
3555 goto getout;
3556 }
3557
3558 #ifdef HAVE_MPI
3559 if (wrap.rank == 0) {
3560 err = svm_predict_main(list, bmodel,
3561 yhat, yhat_written,
3562 dset, prn, &parm,
3563 &wrap, prob);
3564 } else {
3565 err = call_mpi_cross_validation(prob, &parm, &wrap, NULL);
3566 }
3567 #endif
3568
3569 getout:
3570
3571 if (!err && bprob != NULL) {
3572 save_probs_to_bundle(&wrap, bprob);
3573 }
3574
3575 if (!err && bparams != NULL) {
3576 maybe_save_auto_seed(&wrap, bparams);
3577 }
3578
3579 gretl_sv_data_destroy(prob, x_space);
3580 svm_destroy_param(&parm);
3581 sv_wrapper_free(&wrap);
3582
3583 dset->t1 = save_t1;
3584 dset->t2 = save_t2;
3585
3586 return err;
3587 }
3588