1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2017 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 /* gretl interface to libsvm for machine learning */
21 
22 #include "libgretl.h"
23 #include "libset.h"
24 #include "gretl_mt.h"
25 #include "version.h"
26 
27 #ifdef HAVE_MPI
28 # include "gretl_mpi.h"
29 # include "gretl_foreign.h"
30 # include "matrix_extra.h"
31 #endif
32 
33 #ifdef _OPENMP
34 # include <omp.h>
35 #endif
36 
37 #include "svmlib.h"
38 
39 #define DATA_DEBUG 0
40 
41 typedef struct svm_problem sv_data;
42 typedef struct svm_node sv_cell;
43 typedef struct svm_model sv_model;
44 typedef struct svm_parameter sv_parm;
45 typedef struct sv_wrapper_ sv_wrapper;
46 typedef struct sv_grid_ sv_grid;
47 typedef struct grid_row_ grid_row;
48 
49 /* the following need to be in sync with svmlib.h */
50 
51 static const char *svm_type_names[] = {
52     "C-SVC", "nu-SVC", "one-class", "eps-SVR", "nu-SVR",
53     "C-rnk", NULL
54 };
55 
56 static const char *kernel_type_names[] = {
57     "linear", "polynomial", "RBF", "sigmoid",
58     "stump", "perc", "laplace", "expo", NULL
59 };
60 
61 enum {
62     W_SAVEMOD = 1 << 0, /* saving a model? */
63     W_LOADMOD = 1 << 1, /* loading a model? */
64     W_QUIET   = 1 << 2, /* quiet operation? */
65     W_SEARCH  = 1 << 3, /* doing parameter search? */
66     W_STDFMT  = 1 << 4, /* use plain libsvm format for ranges? */
67     W_SVPARM  = 1 << 5, /* saving tuned params to bundle? */
68     W_FOLDVAR = 1 << 6, /* caller-supplied folds variable? */
69     W_YSCALE  = 1 << 7, /* scaling the dependent var? */
70     W_CONSEC  = 1 << 8, /* using consecutive folds? */
71     W_REFOLD  = 1 << 9, /* folds differ across x-validation calls? */
72     W_INTDEP  = 1 << 10, /* dependent variable is integer-valued */
73     W_NOTRAIN = 1 << 11, /* not doing any training */
74     W_MPI     = 1 << 12  /* use MPI for cross validation */
75 };
76 
77 enum {
78     REG_MSE = 1,
79     REG_MAD,
80     REG_ROUND_MAD,
81     REG_ROUND_MISS
82 };
83 
84 struct sv_wrapper_ {
85     int auto_type;
86     int flags;
87     int scaling;
88     int t1;
89     int t2;
90     int t2_train;
91     int k;
92     int nfold;    /* number of folds for x-validation */
93     int predict;  /* are we doing (how much) prediction? */
94     int nproc;    /* number of processes (MPI only) */
95     int rank;     /* "this" MPI rank, if applicable */
96     int regcrit;  /* regression optimality criterion */
97     int do_probs; /* are we doing probability estimation? */
98     double ymin;  /* min value of dependent variable */
99     double ymax;  /* max value of dependent variable */
100     gretl_matrix *ranges;
101     char *ranges_outfile;
102     char *ranges_infile;
103     char *model_outfile;
104     char *model_infile;
105     char *data_outfile;
106     char foldname[VNAMELEN];
107     sv_grid *grid;
108     char *plot;
109     gretl_matrix *xdata;
110     gretl_matrix *Ptrain;
111     gretl_matrix *Ptest;
112     double svr_sigma;
113     int *flist;  /* array of folds IDs, if applicable */
114     int *fsize;  /* array of fold sizes, if applicable */
115     unsigned seed;
116 };
117 
118 struct sv_parm_info {
119     const char *key;
120     GretlType type;
121 };
122 
123 enum {
124     G_C,
125     G_g,
126     G_p
127 };
128 
129 struct grid_row_ {
130     double start;
131     double stop;
132     double step;
133 };
134 
135 struct sv_grid_ {
136     grid_row row[3];
137     int null[3];
138     int n[3];
139     int linear[3];
140 };
141 
142 #define N_PARMS 15
143 
144 #define doing_regression(p) (p->svm_type == EPSILON_SVR || p->svm_type == NU_SVR)
145 
146 static int gui_mode;
147 
148 #ifdef HAVE_MPI
149 static PRN *worker_prn;
150 #endif
151 
sv_wrapper_init(sv_wrapper * w,const DATASET * dset)152 static void sv_wrapper_init (sv_wrapper *w, const DATASET *dset)
153 {
154     w->auto_type = EPSILON_SVR;
155     w->flags = 0;
156     w->scaling = 1;
157     w->t1 = dset->t1;
158     w->t2 = dset->t2;
159     w->t2_train = 0;
160     w->k = 0;
161     w->nfold = 0;
162     w->predict = 2;
163     w->nproc = 0;
164     w->rank = -1;
165     w->regcrit = 0;
166     w->do_probs = 0;
167     w->ymin = 0.0;
168     w->ymax = 0.0;
169     w->ranges = NULL;
170     w->ranges_outfile = NULL;
171     w->ranges_infile = NULL;
172     w->model_outfile = NULL;
173     w->model_infile = NULL;
174     w->data_outfile = NULL;
175     w->foldname[0] = '\0';
176     w->grid = NULL;
177     w->plot = NULL;
178     w->xdata = NULL;
179     w->Ptrain = NULL;
180     w->Ptest = NULL;
181     w->svr_sigma = NADBL;
182     w->flist = NULL;
183     w->fsize = NULL;
184     w->seed = time(NULL);
185 }
186 
sv_wrapper_free(sv_wrapper * w)187 static void sv_wrapper_free (sv_wrapper *w)
188 {
189     gretl_matrix_free(w->ranges);
190     free(w->ranges_outfile);
191     free(w->ranges_infile);
192     free(w->model_outfile);
193     free(w->model_infile);
194     free(w->data_outfile);
195     free(w->grid);
196     free(w->plot);
197     gretl_matrix_free(w->xdata);
198     gretl_matrix_free(w->Ptrain);
199     gretl_matrix_free(w->Ptest);
200     free(w->flist);
201     free(w->fsize);
202 }
203 
maybe_set_svm_seed(const sv_wrapper * w)204 static void maybe_set_svm_seed (const sv_wrapper *w)
205 {
206     if (w->flags & W_REFOLD) {
207 	/* set the seed just once */
208 	static int set;
209 
210 	if (!set) {
211 	    gretl_alt_rand_set_seed(w->seed);
212 	    set = 1;
213 	}
214     } else {
215 	/* set the seed on each x-validation run */
216 	gretl_alt_rand_set_seed(w->seed);
217     }
218 }
219 
loading_model(const sv_wrapper * w)220 static int loading_model (const sv_wrapper *w)
221 {
222     return (w->flags & W_LOADMOD) || w->model_infile != NULL;
223 }
224 
saving_model(const sv_wrapper * w)225 static int saving_model (const sv_wrapper *w)
226 {
227     return (w->flags & W_SAVEMOD) || w->model_outfile != NULL;
228 }
229 
uses_epsilon(const sv_parm * parm)230 static int uses_epsilon (const sv_parm *parm)
231 {
232     return parm->svm_type == EPSILON_SVR;
233 }
234 
uses_gamma(const sv_parm * parm)235 static int uses_gamma (const sv_parm *parm)
236 {
237     return parm->kernel_type == POLY ||
238 	parm->kernel_type == RBF ||
239 	parm->kernel_type == SIGMOID ||
240 	parm->kernel_type == LAPLACE ||
241 	parm->kernel_type == EXPO;
242 }
243 
uses_coef0(const sv_parm * parm)244 static int uses_coef0 (const sv_parm *parm)
245 {
246     return parm->kernel_type == POLY ||
247 	parm->kernel_type == SIGMOID ||
248 	parm->kernel_type == STUMP ||
249 	parm->kernel_type == PERC;
250 }
251 
uses_nu(const sv_parm * parm)252 static int uses_nu (const sv_parm *parm)
253 {
254     return parm->svm_type == NU_SVC ||
255 	parm->svm_type == NU_SVR || parm->svm_type == ONE_CLASS;
256 }
257 
set_sv_parm_defaults(sv_parm * parm)258 static void set_sv_parm_defaults (sv_parm *parm)
259 {
260     parm->svm_type = -1; /* mark as unknown for now */
261     parm->kernel_type = RBF;
262     parm->degree = 3;   /* for polynomial */
263     parm->gamma = 0;    /* poly/RBF/sigmoid: default 1.0 / num_features */
264     parm->coef0 = 0;    /* for use in kernel function */
265 
266     /* training-only variables */
267     parm->cache_size = 1024;   /* cache size in MB */
268     parm->eps = 0.001;         /* stopping criterion */
269     parm->C = 1;               /* cost: for C_SVC, EPSILON_SVR and NU_SVR */
270     parm->nr_weight = 0;       /* for C_SVC */
271     parm->weight_label = NULL; /* for C_SVC */
272     parm->weight = NULL;       /* for C_SVC */
273     parm->nu = 0.5;            /* for NU_SVC, ONE_CLASS, and NU_SVR */
274     parm->p = 0.1;             /* for EPSILON_SVR */
275     parm->shrinking = 1;       /* use shrinking heuristics */
276     parm->probability = 0;     /* do probability estimates */
277 }
278 
279 static struct sv_parm_info pinfo[N_PARMS] = {
280     { "svm_type",     GRETL_TYPE_INT },
281     { "kernel_type",  GRETL_TYPE_INT },
282     { "degree",       GRETL_TYPE_INT },
283     { "gamma",        GRETL_TYPE_DOUBLE },
284     { "coef0",        GRETL_TYPE_DOUBLE },
285     { "cachesize",    GRETL_TYPE_DOUBLE },
286     { "toler",        GRETL_TYPE_DOUBLE },
287     { "C",            GRETL_TYPE_DOUBLE },
288     { "nr_weight",    GRETL_TYPE_INT },
289     { "weight_label", GRETL_TYPE_SERIES },
290     { "weight",       GRETL_TYPE_SERIES },
291     { "nu",           GRETL_TYPE_DOUBLE },
292     { "epsilon",      GRETL_TYPE_DOUBLE },
293     { "shrinking",    GRETL_TYPE_BOOL },
294     { "probability",  GRETL_TYPE_BOOL }
295 };
296 
is_sv_parm(const char * s)297 static int is_sv_parm (const char *s)
298 {
299     int i;
300 
301     for (i=0; i<N_PARMS; i++) {
302 	if (!strcmp(s, pinfo[i].key)) {
303 	    return 1;
304 	}
305     }
306 
307     return 0;
308 }
309 
type_value_from_string(const char * s,int i,PRN * prn,int * err)310 static int type_value_from_string (const char *s, int i,
311 				   PRN *prn, int *err)
312 {
313     int j;
314 
315     if (i == 0) {
316 	/* SVM type */
317 	for (j=0; svm_type_names[j] != NULL; j++) {
318 	    if (!g_ascii_strcasecmp(s, svm_type_names[j])) {
319 		return j;
320 	    }
321 	}
322 	pprintf(prn, "%s: unrecognized SVM type\n", s);
323 	*err = E_INVARG;
324     } else {
325 	/* kernel type */
326 	for (j=0; kernel_type_names[j] != NULL; j++) {
327 	    if (!g_ascii_strcasecmp(s, kernel_type_names[j])) {
328 		return j;
329 	    }
330 	}
331 	pprintf(prn, "%s: unrecognized kernel type\n", s);
332 	*err = E_INVARG;
333     }
334 
335     return -1;
336 }
337 
set_or_store_sv_parm(sv_parm * parm,gretl_bundle * b,int store,PRN * prn)338 static int set_or_store_sv_parm (sv_parm *parm, gretl_bundle *b,
339 				 int store, PRN *prn)
340 {
341     void *elem[N_PARMS] = {
342 	&parm->svm_type,
343 	&parm->kernel_type,
344 	&parm->degree,
345 	&parm->gamma,
346 	&parm->coef0,
347 	&parm->cache_size,
348 	&parm->eps,
349 	&parm->C,
350 	&parm->nr_weight,
351 	&parm->weight_label,
352 	&parm->weight,
353 	&parm->nu,
354 	&parm->p,
355 	&parm->shrinking,
356 	&parm->probability
357     };
358     int got_kspec = 0;
359     int i, ival;
360     double xval;
361     int err = 0;
362 
363     if (store) {
364 	/* we're supposed to store @parm in @b */
365 	for (i=0; i<N_PARMS && !err; i++) {
366 	    if (pinfo[i].type == GRETL_TYPE_DOUBLE) {
367 		xval = *(double *) elem[i];
368 		gretl_bundle_set_scalar(b, pinfo[i].key, xval);
369 	    } else {
370 		ival = *(int *) elem[i];
371 		gretl_bundle_set_int(b, pinfo[i].key, ival);
372 	    }
373 	}
374 	/* done */
375 	return 0;
376     }
377 
378     set_sv_parm_defaults(parm);
379 
380     for (i=0; i<N_PARMS && !err; i++) {
381 	if (gretl_bundle_has_key(b, pinfo[i].key)) {
382 	    if (i < 2) {
383 		/* types of svm and kernel: int or string */
384 		GretlType type = 0;
385 		void *ptr = gretl_bundle_get_data(b, pinfo[i].key,
386 						  &type, NULL, &err);
387 		if (type == GRETL_TYPE_INT) {
388 		    *(int *) elem[i] = *(int *) ptr;
389 		} else if (type == GRETL_TYPE_DOUBLE) {
390 		    *(int *) elem[i] = *(double *) ptr;
391 		} else if (type == GRETL_TYPE_STRING) {
392 		    *(int *) elem[i] = type_value_from_string(ptr, i, prn, &err);
393 		} else {
394 		    fprintf(stderr, "parameter %d, bad option type\n", i);
395 		    err = E_TYPES;
396 		}
397 		if (i == 1) {
398 		    got_kspec = 1;
399 		}
400 	    } else if (i >= 8 && i <= 10) {
401 		pputs(prn, "Sorry, svm weighting not handled yet\n");
402 		err = E_INVARG;
403 	    } else if (pinfo[i].type == GRETL_TYPE_DOUBLE) {
404 		xval = gretl_bundle_get_scalar(b, pinfo[i].key, &err);
405 		if (!err) {
406 		    *(double *) elem[i] = xval;
407 		}
408 	    } else if (pinfo[i].type == GRETL_TYPE_INT ||
409 		       pinfo[i].type == GRETL_TYPE_BOOL) {
410 		ival = gretl_bundle_get_int(b, pinfo[i].key, &err);
411 		if (!err) {
412 		    if (pinfo[i].type == GRETL_TYPE_BOOL) {
413 			*(int *) elem[i] = (ival != 0);
414 		    } else {
415 			*(int *) elem[i] = ival;
416 		    }
417 		}
418 	    }
419 	}
420     }
421 
422     if (!err && !got_kspec) {
423 	/* the user didn't specify a kernel type: we default to
424 	   Gaussian RBF above, but PERC seems to work better for
425 	   ranking
426 	*/
427 	if (parm->svm_type == C_RNK) {
428 	    parm->kernel_type = PERC;
429 	}
430     }
431 
432     return err;
433 }
434 
set_svm_parm(sv_parm * parm,gretl_bundle * b,PRN * prn)435 static int set_svm_parm (sv_parm *parm, gretl_bundle *b,
436 			 PRN *prn)
437 {
438     return set_or_store_sv_parm(parm, b, 0, prn);
439 }
440 
441 /* printing apparatus */
442 
443 static PRN *svm_prn;
444 
gretl_libsvm_print(const char * s)445 static void gretl_libsvm_print (const char *s)
446 {
447     if (svm_prn != NULL) {
448 	pputs(svm_prn, s);
449 	gretl_flush(svm_prn);
450     } else {
451 	fputs(s, stdout);
452 	fflush(stdout);
453     }
454 }
455 
gretl_libsvm_noprint(const char * s)456 static void gretl_libsvm_noprint (const char *s)
457 {
458     return;
459 }
460 
461 /* end printing apparatus */
462 
gretl_destroy_svm_model(sv_model * model)463 static void gretl_destroy_svm_model (sv_model *model)
464 {
465     if (model != NULL) {
466 	if (model->l > 0 && model->SV != NULL &&
467 	    model->SV[0] != NULL) {
468 	    free(model->SV[0]);
469 	}
470 	if (model->sv_coef != NULL) {
471 	    doubles_array_free(model->sv_coef, model->nr_class-1);
472 	}
473 	free(model->SV);
474 	free(model->rho);
475 	free(model->label);
476 	free(model->probA);
477 	free(model->probB);
478 	free(model->sv_indices);
479 	free(model->nSV);
480 	free(model);
481     }
482 }
483 
bundle_as_matrix(gretl_bundle * b,const char * key,double * xvals,int n)484 static int bundle_as_matrix (gretl_bundle *b, const char *key,
485 			     double *xvals, int n)
486 {
487     gretl_matrix *m = gretl_matrix_alloc(n, 1);
488 
489     if (m == NULL) {
490 	return E_ALLOC;
491     } else {
492 	memcpy(m->val, xvals, n * sizeof *xvals);
493 	gretl_bundle_donate_data(b, key, m, GRETL_TYPE_MATRIX, 0);
494 	return 0;
495     }
496 }
497 
bundle_as_list(gretl_bundle * b,const char * key,int * ivals,int n)498 static int bundle_as_list (gretl_bundle *b, const char *key,
499 			   int *ivals, int n)
500 {
501     int *list = gretl_list_new(n);
502 
503     if (list == NULL) {
504 	return E_ALLOC;
505     } else {
506 	memcpy(list + 1, ivals, n * sizeof *ivals);
507 	gretl_bundle_donate_data(b, key, list, GRETL_TYPE_LIST, 0);
508 	return 0;
509     }
510 }
511 
svm_model_save_to_bundle(const sv_model * model,gretl_bundle * b)512 static int svm_model_save_to_bundle (const sv_model *model,
513 				     gretl_bundle *b)
514 {
515     const sv_parm *parm = &model->param;
516     gretl_matrix *m = NULL;
517     int i, j, nc, l, ntr;
518     int err = 0;
519 
520     gretl_bundle_void_content(b);
521 
522     gretl_bundle_set_int(b, "svm_type", parm->svm_type);
523     gretl_bundle_set_int(b, "kernel_type", parm->kernel_type);
524 
525     if (parm->kernel_type == POLY) {
526 	gretl_bundle_set_int(b, "degree", parm->degree);
527     }
528 
529     if (uses_gamma(parm)) {
530 	gretl_bundle_set_scalar(b, "gamma", parm->gamma);
531     }
532 
533     if (uses_coef0(parm)) {
534 	gretl_bundle_set_scalar(b, "coef0", parm->coef0);
535     }
536 
537     nc = model->nr_class;
538     l = model->l;
539 
540     gretl_bundle_set_int(b, "nr_class", nc);
541     gretl_bundle_set_int(b, "l", l);
542 
543     /* number of triangular elements */
544     ntr = nc * (nc - 1) / 2;
545 
546     bundle_as_matrix(b, "rho", model->rho, ntr);
547 
548     if (model->label != NULL) {
549 	bundle_as_list(b, "label", model->label, nc);
550     }
551     if (model->probA != NULL) {
552 	bundle_as_matrix(b, "probA", model->probA, ntr);
553     }
554     if (model->probB != NULL) {
555 	bundle_as_matrix(b, "probB", model->probB, ntr);
556     }
557     if (model->nSV != NULL) {
558 	bundle_as_list(b, "nr_sv", model->nSV, nc);
559     }
560 
561     /* store the SVs */
562 
563     m = gretl_matrix_alloc(l, nc - 1);
564     if (m != NULL) {
565 	for (j=0; j<nc-1; j++) {
566 	    /* j = class index */
567 	    for (i=0; i<l; i++) {
568 		/* i = row index */
569 		gretl_matrix_set(m, i, j, model->sv_coef[j][i]);
570 	    }
571 	}
572 	gretl_bundle_donate_data(b, "sv_coef", m,
573 				 GRETL_TYPE_MATRIX, 0);
574     }
575 
576     {
577 	gretl_array *aidx, *avec = NULL;
578 	gretl_matrix *vec;
579 	int *idx;
580 	int n_elements = 0;
581 
582 	aidx = gretl_array_new(GRETL_TYPE_LISTS, l, &err);
583 	if (!err) {
584 	    avec = gretl_array_new(GRETL_TYPE_MATRICES, l, &err);
585 	}
586 
587 	for (i=0; i<l && !err; i++) {
588 	    const sv_cell *p = model->SV[i];
589 	    int k, ni = 0;
590 
591 	    /* count the nodes on this row */
592 	    while (p->index != -1) {
593 		ni++;
594 		p++;
595 	    }
596 	    idx = gretl_list_new(ni);
597 	    vec = gretl_matrix_alloc(1, ni);
598 	    if (idx == NULL || vec == NULL) {
599 		err = E_ALLOC;
600 		break;
601 	    }
602 	    p = model->SV[i]; /* reset */
603 	    for (k=0; k<ni; k++) {
604 		idx[k+1] = p[k].index;
605 		vec->val[k] = p[k].value;
606 	    }
607 	    gretl_array_set_list(aidx, i, idx, 0);
608 	    gretl_array_set_matrix(avec, i, vec, 0);
609 	    n_elements += ni + 1;
610 	}
611 
612 	if (err) {
613 	    gretl_array_destroy(aidx);
614 	    gretl_array_destroy(avec);
615 	} else {
616 	    gretl_bundle_set_int(b, "n_elements", n_elements);
617 	    gretl_bundle_donate_data(b, "SV_indices", aidx,
618 				     GRETL_TYPE_ARRAY, 0);
619 	    gretl_bundle_donate_data(b, "SV_vecs", avec,
620 				     GRETL_TYPE_ARRAY, 0);
621 	}
622     }
623 
624     if (err) {
625 	gretl_bundle_void_content(b);
626     }
627 
628     return err;
629 }
630 
save_results_to_bundle(const sv_parm * parm,sv_wrapper * w,gretl_bundle * b)631 static void save_results_to_bundle (const sv_parm *parm,
632 				    sv_wrapper *w,
633 				    gretl_bundle *b)
634 {
635     if (w->xdata != NULL) {
636 	int ns = w->xdata->cols;
637 	char **S = strings_array_new(ns);
638 
639 	if (S != NULL) {
640 	    S[0] = gretl_strdup("C");
641 	    S[1] = gretl_strdup("gamma");
642 	    if (ns == 4) {
643 		const char *s = uses_epsilon(parm) ? "epsilon" : "nu";
644 
645 		S[2] = gretl_strdup(s);
646 		S[3] = gretl_strdup("score");
647 	    } else {
648 		S[2] = gretl_strdup("score");
649 	    }
650 	    gretl_matrix_set_colnames(w->xdata, S);
651 	}
652 	gretl_bundle_donate_data(b, "xvalid_results", w->xdata,
653 				 GRETL_TYPE_MATRIX, 0);
654 	w->xdata = NULL;
655     }
656 }
657 
658 /* If we did probability estimation we should have results
659    in w->Ptrain and/or w->Ptest (in the classification
660    case) or a single Laplace scale value in w->svr_sigma
661    (in the regression case). We now stuff this info into
662    the courier bundle supplied by the caller.
663 */
664 
save_probs_to_bundle(sv_wrapper * w,gretl_bundle * b)665 static void save_probs_to_bundle (sv_wrapper *w,
666 				  gretl_bundle *b)
667 {
668     if (w->Ptrain != NULL) {
669 	gretl_bundle_donate_data(b, "Ptrain", w->Ptrain,
670 				 GRETL_TYPE_MATRIX, 0);
671 	w->Ptrain = NULL;
672     }
673     if (w->Ptest != NULL) {
674 	gretl_bundle_donate_data(b, "Ptest", w->Ptest,
675 				 GRETL_TYPE_MATRIX, 0);
676 	w->Ptest = NULL;
677     }
678     if (!na(w->svr_sigma)) {
679 	gretl_bundle_set_scalar(b, "svr_sigma", w->svr_sigma);
680     }
681 }
682 
array_from_bundled_matrix(gretl_bundle * b,const char * key,int required,int * err)683 static double *array_from_bundled_matrix (gretl_bundle *b,
684 					  const char *key,
685 					  int required,
686 					  int *err)
687 {
688     double *ret = NULL;
689 
690     if (*err) return NULL;
691 
692     if (gretl_bundle_has_key(b, key)) {
693 	gretl_matrix *m = gretl_bundle_get_matrix(b, key, err);
694 
695 	if (m != NULL) {
696 	    int n = m->rows * m->cols;
697 
698 	    ret = malloc(n * sizeof *ret);
699 	    if (ret == NULL) {
700 		*err = E_ALLOC;
701 	    } else {
702 		memcpy(ret, m->val, n * sizeof *ret);
703 	    }
704 	}
705     } else if (required) {
706 	gretl_errmsg_sprintf("svm model: required matrix %s was not found", key);
707 	*err = E_DATA;
708     }
709 
710     return ret;
711 }
712 
array_from_bundled_list(gretl_bundle * b,const char * key,int required,int * err)713 static int *array_from_bundled_list (gretl_bundle *b,
714 				     const char *key,
715 				     int required,
716 				     int *err)
717 {
718     int *ret = NULL;
719 
720     if (*err) return NULL;
721 
722     if (gretl_bundle_has_key(b, key)) {
723 	int *list = gretl_bundle_get_list(b, key, err);
724 
725 	if (list != NULL) {
726 	    int n = list[0];
727 
728 	    ret = malloc(n * sizeof *ret);
729 	    if (ret == NULL) {
730 		*err = E_ALLOC;
731 	    } else {
732 		memcpy(ret, list + 1, n * sizeof *ret);
733 	    }
734 	}
735     } else if (required) {
736 	gretl_errmsg_sprintf("svm model: required list %s was not found", key);
737 	*err = E_DATA;
738     }
739 
740     return ret;
741 }
742 
svm_model_from_bundle(gretl_bundle * b,int * err)743 static sv_model *svm_model_from_bundle (gretl_bundle *b,
744 					int *err)
745 {
746     struct svm_parameter *parm;
747     sv_model *model;
748     gretl_matrix *m;
749     int n_elements = 0;
750     int nc = 0, l = 0;
751     int i, j;
752 
753     model = malloc(sizeof *model);
754     if (model == NULL) {
755 	*err = E_ALLOC;
756 	return NULL;
757     }
758 
759     memset(model, 0, sizeof *model);
760     parm = &model->param;
761     *err = set_svm_parm(parm, b, NULL);
762 
763     if (!*err) {
764 	nc = model->nr_class = gretl_bundle_get_int(b, "nr_class", err);
765 	l = model->l = gretl_bundle_get_int(b, "l", err);
766 	n_elements = gretl_bundle_get_int(b, "n_elements", err);
767 	if (nc <= 0 || l <= 0 || n_elements <= 0) {
768 	    *err = E_DATA;
769 	}
770     }
771 
772     model->rho = array_from_bundled_matrix(b, "rho", 1, err);
773 
774     /* not always present */
775     model->label = array_from_bundled_list(b, "label", 0, err);
776     model->probA = array_from_bundled_matrix(b, "probA", 0, err);
777     model->probB = array_from_bundled_matrix(b, "probB", 0, err);
778     model->nSV = array_from_bundled_list(b, "nr_sv", 0, err);
779 
780     /* load the SVs */
781 
782     if (!*err) {
783 	m = gretl_bundle_get_matrix(b, "sv_coef", err);
784 	if (m == NULL) {
785 	    *err = E_DATA;
786 	} else {
787 	    model->sv_coef = doubles_array_new(nc-1, l);
788 	    if (model->sv_coef == NULL) {
789 		*err = E_ALLOC;
790 	    } else {
791 		double *val = m->val;
792 
793 		for (j=0; j<nc-1; j++) {
794 		    memcpy(model->sv_coef[j], val, l * sizeof *val);
795 		    val += l;
796 		}
797 	    }
798 	}
799     }
800 
801     if (!*err) {
802 	sv_cell *p = NULL, *x_space = NULL;
803 	gretl_array *aidx = NULL;
804 	gretl_array *avec = NULL;
805 	gretl_matrix *vec;
806 	int *idx;
807 
808 	model->SV = malloc(l * sizeof *model->SV);
809 	if (model->SV == NULL) {
810 	    *err = E_ALLOC;
811 	} else {
812 	    x_space = malloc(n_elements * sizeof *x_space);
813 	    if (x_space == NULL) {
814 		*err = E_ALLOC;
815 	    } else {
816 		model->SV[0] = p = x_space;
817 	    }
818 	}
819 
820 	if (!*err) {
821 	    aidx = gretl_bundle_get_array(b, "SV_indices", NULL);
822 	    avec = gretl_bundle_get_array(b, "SV_vecs", NULL);
823 	    if (gretl_array_get_type(aidx) != GRETL_TYPE_LISTS ||
824 		gretl_array_get_type(avec) != GRETL_TYPE_MATRICES) {
825 		*err = E_DATA;
826 	    }
827 	}
828 
829 	for (i=0; i<l && !*err; i++) {
830 	    int ni;
831 
832 	    model->SV[i] = p;
833 	    idx = gretl_array_get_element(aidx, i, NULL, err);
834 	    vec = gretl_array_get_element(avec, i, NULL, err);
835 	    ni = idx[0];
836 	    for (j=0; j<ni; j++) {
837 		p[j].index = idx[j+1];
838 		p[j].value = vec->val[j];
839 	    }
840 	    /* add -1 sentinel */
841 	    p[j].index = -1;
842 	    p += ni + 1;
843 	}
844     }
845 
846     if (*err) {
847 	gretl_destroy_svm_model(model);
848 	model = NULL;
849     }
850 
851     return model;
852 }
853 
854 /* can use for testing against svm-scale */
855 
write_ranges(sv_wrapper * w)856 static int write_ranges (sv_wrapper *w)
857 {
858     const char *fname;
859     int libsvm_format = 0;
860     double lo, hi;
861     int i, idx, vi;
862     FILE *fp;
863 
864     fname = gretl_maybe_switch_dir(w->ranges_outfile);
865     fp = gretl_fopen(fname, "wb");
866     if (fp == NULL) {
867 	return E_FOPEN;
868     }
869 
870     if (w->flags & W_STDFMT) {
871 	libsvm_format = 1;
872     }
873 
874     gretl_push_c_numeric_locale();
875 
876     if (libsvm_format) {
877 	fprintf(fp, "x\n%d %d\n",
878 		(int) gretl_matrix_get(w->ranges, 0, 0),
879 		(int) gretl_matrix_get(w->ranges, 0, 1));
880     } else {
881 	fprintf(fp, "x\n%d %d %d\n",
882 		(int) gretl_matrix_get(w->ranges, 0, 0),
883 		(int) gretl_matrix_get(w->ranges, 0, 1),
884 		(int) gretl_matrix_get(w->ranges, 0, 2));
885     }
886 
887     for (i=1; i<w->ranges->rows; i++) {
888 	idx = gretl_matrix_get(w->ranges, i, 0);
889 	lo  = gretl_matrix_get(w->ranges, i, 1);
890 	hi  = gretl_matrix_get(w->ranges, i, 2);
891 	if (libsvm_format) {
892 	    fprintf(fp, "%d %.16g %.16g\n", idx, lo, hi);
893 	} else {
894 	    vi = gretl_matrix_get(w->ranges, i, 3);
895 	    fprintf(fp, "%d %.16g %.16g %d\n", idx, lo, hi, vi);
896 	}
897     }
898 
899     gretl_pop_c_numeric_locale();
900 
901     fclose(fp);
902 
903     return 0;
904 }
905 
read_ranges(sv_wrapper * w)906 static int read_ranges (sv_wrapper *w)
907 {
908     const char *fname;
909     FILE *fp;
910     char line[512];
911     double lo, hi, j;
912     int read_lims = 0;
913     int ncols = 4;
914     int i, vi, idx, n = 0;
915     int err = 0;
916 
917     fname = gretl_maybe_switch_dir(w->ranges_infile);
918     fp = gretl_fopen(fname, "rb");
919     if (fp == NULL) {
920 	return E_FOPEN;
921     }
922 
923     gretl_push_c_numeric_locale();
924 
925     while (fgets(line, sizeof line, fp) && !err) {
926 	if (*line == 'x') {
927 	    read_lims = 1;
928 	    continue;
929 	}
930 	if (read_lims) {
931 	    if (ncols == 3) {
932 		n = sscanf(line, "%lf %lf\n", &lo, &hi);
933 	    } else {
934 		n = sscanf(line, "%lf %lf %lf\n", &lo, &hi, &j);
935 	    }
936 	    if (n != ncols - 1) {
937 		err = E_DATA;
938 	    }
939 	    read_lims = 0;
940 	} else if (!string_is_blank(line)) {
941 	    n++;
942 	}
943     }
944 
945     w->ranges = gretl_matrix_alloc(n+1, ncols);
946     if (w->ranges == NULL) {
947 	err = E_ALLOC;
948     } else {
949 	gretl_matrix_set(w->ranges, 0, 0, lo);
950 	gretl_matrix_set(w->ranges, 0, 1, hi);
951 	gretl_matrix_set(w->ranges, 0, 2, j);
952 	if (ncols == 4) {
953 	    gretl_matrix_set(w->ranges, 0, 3, 0);
954 	}
955 	rewind(fp);
956 	i = 1;
957     }
958 
959     while (fgets(line, sizeof line, fp) && !err) {
960 	if (*line == 'x') {
961 	    if (fgets(line, sizeof line, fp) == NULL) {
962 		err = E_DATA;
963 		break;
964 	    } else {
965 		continue;
966 	    }
967 	}
968 	if (ncols == 3) {
969 	    n = sscanf(line, "%d %lf %lf\n", &idx, &lo, &hi);
970 	} else {
971 	    n = sscanf(line, "%d %lf %lf %d\n", &idx, &lo, &hi, &vi);
972 	}
973 	if (n != ncols) {
974 	    err = E_DATA;
975 	} else {
976 	    gretl_matrix_set(w->ranges, i, 0, idx);
977 	    gretl_matrix_set(w->ranges, i, 1, lo);
978 	    gretl_matrix_set(w->ranges, i, 2, hi);
979 	    if (ncols == 4) {
980 		gretl_matrix_set(w->ranges, i, 3, vi);
981 	    }
982 	    i++;
983 	}
984     }
985 
986     gretl_pop_c_numeric_locale();
987 
988     fclose(fp);
989 
990     return err;
991 }
992 
993 /* can use for testing against svm-scale */
994 
write_problem(sv_data * p,sv_wrapper * w)995 static int write_problem (sv_data *p, sv_wrapper *w)
996 {
997     const char *fname;
998     FILE *fp;
999     int i, t, idx;
1000     double val;
1001 
1002     fname = gretl_maybe_switch_dir(w->data_outfile);
1003     fp = gretl_fopen(fname, "wb");
1004     if (fp == NULL) {
1005 	return E_FOPEN;
1006     }
1007 
1008     gretl_push_c_numeric_locale();
1009 
1010     for (t=0; t<p->l; t++) {
1011 	fprintf(fp, "%g ", p->y[t]);
1012 	for (i=0; i<w->k; i++) {
1013 	    idx = p->x[t][i].index;
1014 	    val = p->x[t][i].value;
1015 	    if (val != 0) {
1016 		fprintf(fp, "%d:%g ", idx, val);
1017 	    }
1018 	}
1019 	fputc('\n', fp);
1020     }
1021 
1022     gretl_pop_c_numeric_locale();
1023 
1024     fclose(fp);
1025 
1026     return 0;
1027 }
1028 
gretl_sv_data_destroy(sv_data * p,sv_cell * x_space)1029 static void gretl_sv_data_destroy (sv_data *p, sv_cell *x_space)
1030 {
1031     if (p != NULL) {
1032 	free(p->y);
1033 	free(p->x);
1034 	free(p);
1035     }
1036     if (x_space != NULL) {
1037 	free(x_space);
1038     }
1039 }
1040 
gretl_sv_data_alloc(int T,int k,sv_cell ** px_space,int * err)1041 static sv_data *gretl_sv_data_alloc (int T, int k,
1042 				     sv_cell **px_space,
1043 				     int *err)
1044 {
1045     sv_data *p = malloc(sizeof *p);
1046 
1047     if (p != NULL) {
1048 	p->l = T;
1049 	p->y = malloc(T * sizeof *p->y);
1050 	p->x = malloc(T * sizeof *p->x);
1051 	if (p->y == NULL || p->x == NULL) {
1052 	    *err = E_ALLOC;
1053 	} else {
1054 	    /* we need an extra cell on each row to hold a
1055 	       sentinel index value of -1
1056 	    */
1057 	    *px_space = malloc(T * (k+1) * sizeof(sv_cell));
1058 	    if (*px_space == NULL) {
1059 		*err = E_ALLOC;
1060 	    }
1061 	}
1062 	if (*err) {
1063 	    gretl_sv_data_destroy(p, NULL);
1064 	    p = NULL;
1065 	}
1066     } else {
1067 	*err = E_ALLOC;
1068     }
1069 
1070     return p;
1071 }
1072 
1073 /* initial discovery of data ranges using the training data */
1074 
get_data_ranges(const int * list,const DATASET * dset,sv_wrapper * w)1075 static int get_data_ranges (const int *list,
1076 			    const DATASET *dset,
1077 			    sv_wrapper *w)
1078 {
1079     const double *x;
1080     double xmin, xmax;
1081     int lmax = list[0];
1082     int i, j, vi;
1083     int err = 0;
1084 
1085     if (w->flags & W_FOLDVAR) {
1086 	/* the last series is not a regressor */
1087 	lmax--;
1088     }
1089 
1090     w->ranges = gretl_matrix_alloc(lmax, 4);
1091     if (w->ranges == NULL) {
1092 	return E_ALLOC;
1093     }
1094 
1095     /* scaling limits */
1096     xmin = w->scaling == 2 ? 0 : -1;
1097     gretl_matrix_set(w->ranges, 0, 0, xmin); /* lower */
1098     gretl_matrix_set(w->ranges, 0, 1, 1);    /* upper */
1099 
1100     /* padding */
1101     gretl_matrix_set(w->ranges, 0, 2, 0);
1102     gretl_matrix_set(w->ranges, 0, 3, 0);
1103 
1104     j = 0;
1105     for (i=2; i<=lmax; i++) {
1106 	vi = list[i];
1107 	x = dset->Z[vi];
1108 	gretl_minmax(dset->t1, dset->t2, x, &xmin, &xmax);
1109 	if (xmin != xmax) {
1110 	    j++;
1111 	    gretl_matrix_set(w->ranges, j, 0, j);
1112 	    gretl_matrix_set(w->ranges, j, 1, xmin);
1113 	    gretl_matrix_set(w->ranges, j, 2, xmax);
1114 	    gretl_matrix_set(w->ranges, j, 3, vi);
1115 	} else {
1116 	    fprintf(stderr, "training data: dropping var %d (%s)\n",
1117 		    vi, dset->varname[vi]);
1118 	}
1119     }
1120 
1121     if (w->flags & W_YSCALE) {
1122 	/* we'll arrange to scale y onto [-1, +1] */
1123 	gretl_minmax(dset->t1, dset->t2, dset->Z[list[1]],
1124 		     &w->ymin, &w->ymax);
1125     }
1126 
1127     /* record number of rows actually occupied, which
1128        could be less than the number allocated
1129     */
1130     gretl_matrix_set(w->ranges, 0, 2, j + 1);
1131 
1132     return err;
1133 }
1134 
1135 /* The following requires a gretl-special "ranges" matrix
1136    with 4 columns, including the dataset IDs of the series.
1137 */
1138 
check_test_data(const DATASET * dset,gretl_matrix * ranges,int k)1139 static int check_test_data (const DATASET *dset,
1140 			    gretl_matrix *ranges,
1141 			    int k)
1142 {
1143     double xmin, xmax;
1144     int i, n, vi;
1145     int err = 0;
1146 
1147     n = 0;
1148     for (i=1; i<=k; i++) {
1149 	vi = gretl_matrix_get(ranges, i, 3);
1150 	gretl_minmax(dset->t1, dset->t2, dset->Z[vi], &xmin, &xmax);
1151 	if (xmin != xmax) {
1152 	    n++;
1153 	} else {
1154 	    fprintf(stderr, "test data: dropping var %d (%s)\n",
1155 		    vi, dset->varname[vi]);
1156 	    /* arrange to exclude this variable by setting the
1157 	       record of its series ID to zero */
1158 	    gretl_matrix_set(ranges, i, 3, 0);
1159 	}
1160     }
1161 
1162     if (n != k) {
1163 	fprintf(stderr, "test data: number of usable variables (%d) "
1164 		"differs from training data (%d)\n", n, k);
1165     } else {
1166 	fprintf(stderr, "test data: number of usable variables "
1167 		"agrees with training data\n");
1168     }
1169 
1170     return err;
1171 }
1172 
1173 /* apply scaling as per the svm-scale binary */
1174 
scale_x(double val,double lo,double hi,double scalemin,double scalemax)1175 static double scale_x (double val, double lo, double hi,
1176 		       double scalemin, double scalemax)
1177 {
1178     if (val == lo) {
1179 	val = scalemin;
1180     } else if (val == hi) {
1181 	val = scalemax;
1182     } else {
1183 	val = scalemin + (scalemax - scalemin) *
1184 	    (val - lo) / (hi - lo);
1185     }
1186 
1187     return val;
1188 }
1189 
scale_y(double y,sv_wrapper * w)1190 static double scale_y (double y, sv_wrapper *w)
1191 {
1192     return -1 + 2 * (y - w->ymin) / (w->ymax - w->ymin);
1193 }
1194 
unscale_y(double y,sv_wrapper * w)1195 static double unscale_y (double y, sv_wrapper *w)
1196 {
1197     return w->ymin + (w->ymax - w->ymin) * (y + 1) / 2.0;
1198 }
1199 
sv_data_fill(sv_data * prob,sv_cell * x_space,sv_wrapper * w,const int * list,const DATASET * dset,int pass)1200 static int sv_data_fill (sv_data *prob,
1201 			 sv_cell *x_space,
1202 			 sv_wrapper *w,
1203 			 const int *list,
1204 			 const DATASET *dset,
1205 			 int pass)
1206 {
1207     double scalemin, scalemax;
1208     double yt, xit, xmin, xmax;
1209     int i, j, s, t, idx;
1210     int vf = 0, pos = 0;
1211     int vi = list[1];
1212     int all_ints = 0;
1213     int k = w->k;
1214 
1215 #if DATA_DEBUG
1216     fprintf(stderr, "sv_data_fill, pass %d\n", pass);
1217 #endif
1218 
1219     /* deal with the LHS variable */
1220     if (pass == 1) {
1221 	if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi]) ||
1222 	    series_is_coded(dset, vi)) {
1223 	    /* classification, not regression */
1224 	    w->auto_type = C_SVC;
1225 	}
1226 	all_ints = 1;
1227     }
1228 
1229     for (i=0, t=dset->t1; t<=dset->t2; t++, i++) {
1230 	yt = dset->Z[vi][t];
1231         if (w->flags & W_YSCALE) {
1232 	    prob->y[i] = scale_y(yt, w);
1233 	} else {
1234 	    prob->y[i] = yt;
1235 	}
1236 	if (all_ints && prob->y[i] != floor(prob->y[i])) {
1237 	    all_ints = 0;
1238 	}
1239     }
1240 
1241     if (pass == 1 && (w->flags & W_FOLDVAR)) {
1242 	w->flist = gretl_list_new(prob->l);
1243 	if (w->flist != NULL) {
1244 	    /* record the ID of the folds series */
1245 	    vf = list[list[0]];
1246 	}
1247     }
1248 
1249     if (pass == 1 && all_ints) {
1250 	w->flags |= W_INTDEP;
1251     }
1252 
1253     /* retrieve the global x-scaling limits */
1254     scalemin = gretl_matrix_get(w->ranges, 0, 0);
1255     scalemax = gretl_matrix_get(w->ranges, 0, 1);
1256 
1257 #if DATA_DEBUG
1258     fprintf(stderr, "scalemin = %g, scalemax = %g\n", scalemin, scalemax);
1259 #endif
1260 
1261     /* write the scaled x-data into the problem struct */
1262     for (s=0, t=dset->t1; t<=dset->t2; t++, s++) {
1263 	if (vf > 0) {
1264 	    /* record the specified fold for this obs */
1265 	    w->flist[s+1] = dset->Z[vf][t];
1266 	}
1267 	prob->x[s] = &x_space[pos];
1268 	j = 0;
1269 	for (i=1; i<=k; i++) {
1270 	    if (w->ranges->cols == 4) {
1271 		vi = (int) gretl_matrix_get(w->ranges, i, 3);
1272 		if (vi <= 0) {
1273 		    /* may happen when we get to the test data */
1274 		    continue;
1275 		}
1276 	    }
1277 	    idx = (int) gretl_matrix_get(w->ranges, i, 0);
1278 	    xmin = gretl_matrix_get(w->ranges, i, 1);
1279 	    xmax = gretl_matrix_get(w->ranges, i, 2);
1280 	    xit = dset->Z[vi][t];
1281 	    if (na(xit)) {
1282 		/* catch missing values */
1283 		fprintf(stderr, "skipping NA for var %d, obs %d\n", vi, t+1);
1284 		xit = 0;
1285 	    } else if (w->scaling != 0) {
1286 		xit = scale_x(xit, xmin, xmax, scalemin, scalemax);
1287 	    }
1288 	    if (xit == 0) {
1289 		/* fprintf(stderr, "skipping a 0 data value (var %d)\n", vi); */
1290 		continue;
1291 	    }
1292 	    prob->x[s][j].index = idx;
1293 	    prob->x[s][j].value = xit;
1294 	    pos++;
1295 	    j++;
1296 	}
1297 	/* end-of-row sentinel */
1298 	prob->x[s][j].index = -1;
1299 	prob->x[s][j].value = 0;
1300 	pos++;
1301     }
1302 
1303     return 0;
1304 }
1305 
1306 /* apparatus for sorting labels into ascending order */
1307 
1308 struct lsort {
1309     int val;
1310     int pos;
1311 };
1312 
ls_compare(const void * a,const void * b)1313 static int ls_compare (const void *a, const void *b)
1314 {
1315     const struct lsort *pa = a;
1316     const struct lsort *pb = b;
1317 
1318     return (pa->val > pb->val) - (pa->val < pb->val);
1319 }
1320 
1321 /* Allocate a matrix (either w->Ptrain or w->Ptest)
1322    into which we'll write the per-outcome probabilities.
1323    Set the sorted labels as column names for this
1324    matrix, and return in location @pls the sorting
1325    info that will enable us to write the probabilities
1326    into the correct corresponding columns.
1327 */
1328 
get_probs_matrix(sv_wrapper * w,sv_model * model,struct lsort ** pls,int training)1329 static gretl_matrix *get_probs_matrix (sv_wrapper *w,
1330 				       sv_model *model,
1331 				       struct lsort **pls,
1332 				       int training)
1333 {
1334     gretl_matrix **targ;
1335     int nc = model->nr_class;
1336     int T, pt1 = 0, pt2 = 0;
1337 
1338     if (training) {
1339 	targ = &w->Ptrain;
1340 	pt1 = w->t1;
1341 	pt2 = w->t2_train;
1342     } else {
1343 	targ = &w->Ptest;
1344 	pt1 = w->t2_train + 1;
1345 	pt2 = w->t2;
1346     }
1347 
1348     T = pt2 - pt1 + 1;
1349     *targ = gretl_matrix_alloc(T, nc);
1350     if (*targ == NULL) {
1351 	return NULL;
1352     }
1353 
1354     gretl_matrix_set_t1(*targ, pt1);
1355     gretl_matrix_set_t2(*targ, pt2);
1356 
1357     if (model->label != NULL) {
1358 	char **S = strings_array_new(nc);
1359 	struct lsort *ls = malloc(nc * sizeof *ls);
1360 
1361 	if (S != NULL && ls != NULL) {
1362 	    char labnum[32];
1363 	    int i;
1364 
1365 	    for (i=0; i<nc; i++) {
1366 		ls[i].val = model->label[i];
1367 		ls[i].pos = i;
1368 	    }
1369 	    qsort(ls, nc, sizeof *ls, ls_compare);
1370 	    for (i=0; i<nc; i++) {
1371 		sprintf(labnum, "%d", ls[i].val);
1372 		S[i] = gretl_strdup(labnum);
1373 	    }
1374 	    gretl_matrix_set_colnames(*targ, S);
1375 	    *pls = ls;
1376 	}
1377     }
1378 
1379     return *targ;
1380 }
1381 
real_svm_predict(double * yhat,sv_data * prob,sv_wrapper * w,sv_model * model,int training,const DATASET * dset,PRN * prn)1382 static int real_svm_predict (double *yhat,
1383 			     sv_data *prob,
1384 			     sv_wrapper *w,
1385 			     sv_model *model,
1386 			     int training,
1387 			     const DATASET *dset,
1388 			     PRN *prn)
1389 {
1390     const char *datastr;
1391     gretl_matrix *P = NULL;
1392     struct lsort *ls = NULL;
1393     int n_correct = 0;
1394     int regression = 0;
1395     int nr_class = 0;
1396     int get_sigma = 0;
1397     double ymean = 0.0;
1398     double TSS = 0.0;
1399     double SSR = 0.0;
1400     double MAD = 0.0;
1401     int misses = 0;
1402     double dev, yhi, yi;
1403     double *pi = NULL;
1404     sv_cell *x;
1405     int i, j, err = 0;
1406 
1407     if (model->param.svm_type == EPSILON_SVR ||
1408 	model->param.svm_type == NU_SVR) {
1409 	regression = 1;
1410 	if (w->flags & W_YSCALE) {
1411 	    for (i=0; i<prob->l; i++) {
1412 		ymean += unscale_y(prob->y[i], w);
1413 	    }
1414 	    ymean /= prob->l;
1415 	} else {
1416 	    ymean = gretl_mean(0, prob->l - 1, prob->y);
1417 	}
1418     }
1419 
1420     if (model->param.probability) {
1421 	if (model->probA == NULL) {
1422 	    fprintf(stderr, "probability requested but no probA!\n");
1423 	    w->do_probs = 0;
1424 	} else if (regression) {
1425 	    get_sigma = training;
1426 	} else {
1427 	    /* classification */
1428 	    nr_class = svm_get_nr_class(model);
1429 	    P = get_probs_matrix(w, model, &ls, training);
1430 	    if (P == NULL) {
1431 		err = E_ALLOC;
1432 	    } else {
1433 		pi = malloc(nr_class * sizeof *pi);
1434 		w->do_probs = 1;
1435 	    }
1436 	}
1437     }
1438 
1439     if (err) {
1440 	return err;
1441     }
1442 
1443     if (w->do_probs) {
1444 	maybe_set_svm_seed(w);
1445     }
1446 
1447     pprintf(prn, "Calling prediction function (this may take a while)\n");
1448     gretl_flush(prn);
1449     for (i=0; i<prob->l; i++) {
1450 	x = prob->x[i];
1451 	if (w->do_probs) {
1452 	    yhi = svm_predict_probability(model, x, pi);
1453 	    for (j=0; j<nr_class; j++) {
1454 		/* transcribe probability estimates */
1455 		if (ls != NULL) {
1456 		    /* re-order the columns */
1457 		    gretl_matrix_set(P, i, j, pi[ls[j].pos]);
1458 		} else {
1459 		    gretl_matrix_set(P, i, j, pi[j]);
1460 		}
1461 	    }
1462 	} else {
1463 	    yhi = svm_predict(model, x);
1464 	}
1465 	yi = prob->y[i];
1466 	if (!regression) {
1467 	    n_correct += (yhi == yi);
1468 	}
1469 	if (w->flags & W_YSCALE) {
1470 	    yhi = unscale_y(yhi, w);
1471 	    yi = unscale_y(yi, w);
1472 	}
1473 	yhat[dset->t1 + i] = yhi;
1474 	if (regression) {
1475 	    /* deviation from mean */
1476 	    dev = yi - ymean;
1477 	    TSS += dev * dev;
1478 	    /* actual minus predicted */
1479 	    dev = yi - yhi;
1480 	    SSR += dev * dev;
1481 	    if (w->regcrit == REG_ROUND_MISS) {
1482 		misses += yi != round(yhi);
1483 	    } else if (w->regcrit == REG_ROUND_MAD) {
1484 		MAD += fabs(yi - round(yhi));
1485 	    } else {
1486 		MAD += fabs(dev);
1487 	    }
1488 	}
1489     }
1490 
1491     if (pi != NULL) {
1492 	free(pi);
1493 	free(ls);
1494     } else if (get_sigma) {
1495 	/* retrieve estimate of Laplace scale */
1496 	w->svr_sigma = svm_get_svr_probability(model);
1497     }
1498 
1499     datastr = training ? "Training data" : "Test data";
1500 
1501     if (regression) {
1502 	MAD /= prob->l;
1503 	if (w->regcrit == REG_ROUND_MISS) {
1504 	    pprintf(prn, "%s: miss ratio = %g (MSE = %g, R^2 = %g)\n", datastr,
1505 		    misses / (double) prob->l, SSR / prob->l, 1.0 - SSR / TSS);
1506 	} else if (w->regcrit > REG_MSE) {
1507 	    pprintf(prn, "%s: MAD = %g (MSE = %g, R^2 = %g)\n", datastr,
1508 		    MAD, SSR / prob->l, 1.0 - SSR / TSS);
1509 	} else {
1510 	    pprintf(prn, "%s: MSE = %g, R^2 = %g (MAD = %g)\n", datastr,
1511 		    SSR / prob->l, 1.0 - SSR / TSS, MAD);
1512 	}
1513     } else {
1514 	pprintf(prn, "%s: correct predictions = %d (%.1f percent)\n", datastr,
1515 		n_correct, 100 * n_correct / (double) prob->l);
1516     }
1517 
1518     return 0;
1519 }
1520 
print_xvalid_iter(sv_parm * parm,sv_wrapper * w,double val,const char * label,int iter,PRN * prn)1521 static void print_xvalid_iter (sv_parm *parm,
1522 			       sv_wrapper *w,
1523 			       double val,
1524 			       const char *label,
1525 			       int iter,
1526 			       PRN *prn)
1527 {
1528     if (iter >= 0) {
1529 	pprintf(prn, "[%d] ", iter + 1);
1530     } else {
1531 	pputs(prn, "\nCross validation:\n ");
1532     }
1533     pprintf(prn, "C = %g", parm->C);
1534     if (uses_gamma(parm)) {
1535 	pprintf(prn, ", gamma = %g", parm->gamma);
1536     }
1537     if (uses_epsilon(parm)) {
1538 	pprintf(prn, ", epsilon = %g", parm->p);
1539     } else if (uses_nu(parm)) {
1540 	pprintf(prn, ", nu = %g", parm->nu);
1541     }
1542     pprintf(prn, ": %s = %#.8g\n", label, val);
1543     gretl_flush(prn);
1544 }
1545 
get_fold_sizes(const sv_data * data,sv_wrapper * w,const DATASET * dset)1546 static int *get_fold_sizes (const sv_data *data,
1547 			    sv_wrapper *w,
1548 			    const DATASET *dset)
1549 {
1550     int *ret = gretl_list_new(w->nfold);
1551     int i, t;
1552 
1553     if (w->flags & W_CONSEC) {
1554 	/* use equally sized consecutive blocks */
1555 	int t2 = w->t2_train > 0 ? w->t2_train : dset->t2;
1556 	int n = t2 - dset->t1 + 1;
1557 	int ni = n / w->nfold;
1558 
1559 	for (i=1; i<=w->nfold; i++) {
1560 	    ret[i] = ni;
1561 	}
1562 	ret[w->nfold] += n % w->nfold;
1563     } else {
1564 	/* use the supplied folds series */
1565 	for (i=1; i<=w->nfold; i++) {
1566 	    ret[i] = 0;
1567 	    for (t=0; t<data->l; t++) {
1568 		if (w->flist[t+1] == i) {
1569 		    ret[i] += 1;
1570 		}
1571 	    }
1572 	}
1573     }
1574 
1575     return ret;
1576 }
1577 
1578 /* Carry out cross validation in the case where the user has provided
1579    a series to specify the "folds", or has specified a given number of
1580    consecutive blocks, as opposed to the default random subsetting.
1581 */
1582 
custom_xvalidate(const sv_data * prob,const sv_parm * parm,const sv_wrapper * w,double * targ)1583 static void custom_xvalidate (const sv_data *prob,
1584 			      const sv_parm *parm,
1585 			      const sv_wrapper *w,
1586 			      double *targ)
1587 {
1588     int i, vi, ni;
1589 
1590     for (i=0; i<w->nfold; i++) {
1591 	struct svm_problem subprob;
1592 	struct svm_model *submodel;
1593 	int jmin = 0, jmax = 0;
1594 	int j, k, useobs;
1595 
1596 	vi = i + 1;
1597 	ni = w->fsize[vi];
1598 	subprob.l = prob->l - ni;
1599 	subprob.x = malloc(subprob.l * sizeof *subprob.x);
1600 	subprob.y = malloc(subprob.l * sizeof *subprob.y);
1601 
1602 	if (w->flags & W_CONSEC) {
1603 	    /* find start and end points for fold */
1604 	    jmin = i * w->fsize[1];
1605 	    jmax = jmin + ni;
1606 	}
1607 
1608 	/* set the training subsample, excluding fold i */
1609 	k = 0;
1610 	for (j=0; j<prob->l; j++) {
1611 	    if (w->flags & W_CONSEC) {
1612 		useobs = j < jmin || j >= jmax;
1613 	    } else {
1614 		useobs = w->flist[j+1] != vi;
1615 	    }
1616 	    if (useobs) {
1617 		subprob.x[k] = prob->x[j];
1618 		subprob.y[k] = prob->y[j];
1619 		k++;
1620 	    }
1621 	}
1622 
1623 	/* train on the given subsample */
1624 	submodel = svm_train(&subprob, parm);
1625 
1626 	/* predict on the complementary subsample (fold i only) */
1627 	if (w->flags & W_CONSEC) {
1628 	    /* we don't have to scan the whole prob->x array */
1629 	    for (j=jmin; j<jmax; j++) {
1630 		targ[j] = svm_predict(submodel, prob->x[j]);
1631 	    }
1632 	} else {
1633 	    /* the values we want may be interspersed */
1634 	    for (j=0; j<prob->l; j++) {
1635 		if (w->flist[j+1] == vi) {
1636 		    targ[j] = svm_predict(submodel, prob->x[j]);
1637 		}
1638 	    }
1639 	}
1640 
1641 	svm_free_and_destroy_model(&submodel);
1642 	free(subprob.x);
1643 	free(subprob.y);
1644     }
1645 }
1646 
1647 /* implement a single cross validation pass */
1648 
xvalidate_once(sv_data * prob,sv_parm * parm,sv_wrapper * w,double * targ,double * crit,int iter,PRN * prn)1649 static int xvalidate_once (sv_data *prob,
1650 			   sv_parm *parm,
1651 			   sv_wrapper *w,
1652 			   double *targ,
1653 			   double *crit,
1654 			   int iter,
1655 			   PRN *prn)
1656 {
1657     int i, n = prob->l;
1658 
1659     if (w->fsize != NULL) {
1660 	custom_xvalidate(prob, parm, w, targ);
1661     } else {
1662 	maybe_set_svm_seed(w);
1663 	svm_cross_validation(prob, parm, w->nfold, targ);
1664     }
1665 
1666     if (doing_regression(parm)) {
1667 	double yi, yhi, dev, minimand = 0;
1668 
1669 	for (i=0; i<prob->l; i++) {
1670 	    yi = prob->y[i];
1671 	    yhi = targ[i];
1672 	    if (w->flags & W_YSCALE) {
1673 		yi = unscale_y(yi, w);
1674 		yhi = unscale_y(yhi, w);
1675 	    }
1676 	    dev = yi - yhi;
1677 	    if (w->regcrit == REG_ROUND_MISS) {
1678 		minimand += (yi != round(yhi));
1679 	    } else if (w->regcrit == REG_ROUND_MAD) {
1680 		minimand += fabs(yi - round(yhi));
1681 	    } else if (w->regcrit == REG_MAD) {
1682 		minimand += fabs(dev);
1683 	    } else {
1684 		minimand += dev * dev;
1685 	    }
1686 	}
1687 	minimand /= n;
1688 	if (prn != NULL) {
1689 	    const char *s = (w->regcrit == REG_MSE)? "MSE" :
1690 		(w->regcrit == REG_ROUND_MISS)? "miss ratio" : "MAD";
1691 
1692 	    print_xvalid_iter(parm, w, minimand, s, iter, prn);
1693 	}
1694 	*crit = -minimand;
1695     } else {
1696 	/* classification */
1697 	double pc_correct = 0;
1698 	int n_correct = 0;
1699 
1700 	for (i=0; i<n; i++) {
1701 	    if (targ[i] == prob->y[i]) {
1702 		n_correct++;
1703 	    }
1704 	}
1705 	pc_correct = 100.0 * n_correct / (double) n;
1706 	if (prn != NULL) {
1707 	    print_xvalid_iter(parm, w, pc_correct,
1708 			      "percent correct", iter, prn);
1709 	}
1710 	*crit = pc_correct;
1711     }
1712 
1713     return 0;
1714 }
1715 
1716 /* For comparison, from libsvm's grid.py:
1717 
1718   -log2c {begin,end,step | "null"} : set the range of c (default -5,15,2)
1719     begin,end,step -- c_range = 2^{begin,...,begin+k*step,...,end}
1720     "null"         -- do not grid with c
1721   -log2g {begin,end,step | "null"} : set the range of g (default 3,-15,-2)
1722     begin,end,step -- g_range = 2^{begin,...,begin+k*step,...,end}
1723     "null"         -- do not with g
1724 */
1725 
grid_set_dimensions(sv_grid * g,const gretl_matrix * m)1726 static int grid_set_dimensions (sv_grid *g, const gretl_matrix *m)
1727 {
1728     int i, linvals = 0;
1729     double x;
1730 
1731     if (m != NULL && m->cols == 4) {
1732 	linvals = 1;
1733     }
1734 
1735     for (i=0; i<=G_p; i++) {
1736 	if ((g->row[i].stop < g->row[i].start && g->row[i].step >= 0) ||
1737 	    (g->row[i].stop > g->row[i].start && g->row[i].step <= 0)) {
1738 	    return E_INVARG;
1739 	}
1740 	g->null[i] = g->n[i] = g->linear[i] = 0;
1741 	if (g->row[i].start == 0 && g->row[i].stop == 0 && g->row[i].step == 0) {
1742 	    /* flag clamping of this row */
1743 	    g->null[i] = g->n[i] = 1;
1744 	} else if (g->row[i].stop >= g->row[i].start) {
1745 	    for (x=g->row[i].start; x<=g->row[i].stop; x+=g->row[i].step) {
1746 		g->n[i] += 1;
1747 	    }
1748 	} else {
1749 	    for (x=g->row[i].start; x>=g->row[i].stop; x+=g->row[i].step) {
1750 		g->n[i] += 1;
1751 	    }
1752 	}
1753 	if (linvals && i < m->rows && gretl_matrix_get(m, i, 3) != 0) {
1754 	    g->linear[i] = 1;
1755 	}
1756     }
1757 
1758     return 0;
1759 }
1760 
sv_grid_default(sv_grid * g,sv_parm * parm)1761 static void sv_grid_default (sv_grid *g, sv_parm *parm)
1762 {
1763     int gsearch = parm == NULL || uses_gamma(parm);
1764     int altgrid = 0;
1765 
1766     if (altgrid) {
1767 	g->row[G_C].start = -2;
1768 	g->row[G_C].stop = 6;
1769 	g->row[G_C].step = 1;
1770     } else {
1771 	g->row[G_C].start = -5;
1772 	g->row[G_C].stop = 8; /* grid.py has 15 (too big?) */
1773 	g->row[G_C].step = 2;
1774     }
1775 
1776     if (gsearch) {
1777 	g->row[G_g].start = 3;
1778 	g->row[G_g].stop = -15;
1779 	g->row[G_g].step = -2;
1780     } else {
1781 	g->row[G_g].start = 0;
1782 	g->row[G_g].stop = 0;
1783 	g->row[G_g].step = 0;
1784     }
1785 
1786     g->row[G_p].start = 0;
1787     g->row[G_p].stop = 0;
1788     g->row[G_p].step = 0;
1789 
1790     grid_set_dimensions(g, NULL);
1791 }
1792 
print_grid(sv_grid * g,sv_parm * parm,PRN * prn)1793 static void print_grid (sv_grid *g, sv_parm *parm, PRN *prn)
1794 {
1795     const char *labels[] = {
1796 	"C",
1797 	"gamma",
1798 	""
1799     };
1800     int i, imax = 2;
1801 
1802     if (!g->null[G_p]) {
1803 	imax = 3;
1804 	labels[2] = uses_nu(parm) ? "nu" : "eps";
1805     }
1806 
1807     pputs(prn, "parameter search grid (start, stop, step):\n");
1808 
1809     for (i=0; i<imax; i++) {
1810 	if (!g->null[i]) {
1811 	    pprintf(prn, " %-8s %g, %g, %g", labels[i],
1812 		    g->row[i].start, g->row[i].stop,
1813 		    g->row[i].step);
1814 	    if (g->n[i] > 1) {
1815 		pprintf(prn, " (%d values, %s)\n", g->n[i],
1816 			g->linear[i] ? "linear" : "log2-based");
1817 	    } else {
1818 		pputc(prn, '\n');
1819 	    }
1820 	}
1821     }
1822     pputc(prn, '\n');
1823 }
1824 
grid_get_C(sv_grid * g,int i)1825 static double grid_get_C (sv_grid *g, int i)
1826 {
1827     double Ci = g->row[G_C].start + i * g->row[G_C].step;
1828 
1829     return g->linear[G_C] ? Ci : pow(2.0, Ci);
1830 }
1831 
grid_get_g(sv_grid * g,int j)1832 static double grid_get_g (sv_grid *g, int j)
1833 {
1834     double gj = g->row[G_g].start + j * g->row[G_g].step;
1835 
1836     return g->linear[G_g] ? gj : pow(2.0, gj);
1837 }
1838 
grid_get_p(sv_grid * g,int k)1839 static double grid_get_p (sv_grid *g, int k)
1840 {
1841     double pk = g->row[G_p].start + k * g->row[G_p].step;
1842 
1843     return g->linear[G_p] ? pk : pow(2.0, pk);
1844 }
1845 
sv_wrapper_add_grid(sv_wrapper * w,sv_parm * parm,const gretl_matrix * m)1846 static int sv_wrapper_add_grid (sv_wrapper *w,
1847 				sv_parm *parm,
1848 				const gretl_matrix *m)
1849 {
1850     sv_grid *g;
1851     int i, err = 0;
1852 
1853     g = calloc(1, sizeof *g);
1854 
1855     if (g == NULL) {
1856 	err = E_ALLOC;
1857     } else if (m != NULL) {
1858 	if (m->rows < 1 || m->cols < 3) {
1859 	    err = E_INVARG;
1860 	} else {
1861 	    for (i=0; i<m->rows; i++) {
1862 		g->row[i].start = gretl_matrix_get(m, i, 0);
1863 		g->row[i].stop  = gretl_matrix_get(m, i, 1);
1864 		g->row[i].step  = gretl_matrix_get(m, i, 2);
1865 	    }
1866 	    err = grid_set_dimensions(g, m);
1867 	}
1868     } else {
1869 	sv_grid_default(g, parm);
1870     }
1871 
1872     if (err) {
1873 	if (g != NULL) {
1874 	    free(g);
1875 	}
1876     } else {
1877 	w->grid = g;
1878     }
1879 
1880     return err;
1881 }
1882 
sv_wrapper_remove_grid(sv_wrapper * w)1883 static void sv_wrapper_remove_grid (sv_wrapper *w)
1884 {
1885     if (w != NULL && w->grid != NULL) {
1886 	free(w->grid);
1887 	w->grid = NULL;
1888     }
1889 }
1890 
write_plot_file(sv_wrapper * w,sv_parm * parm,double cmax)1891 static int write_plot_file (sv_wrapper *w,
1892 			    sv_parm *parm,
1893 			    double cmax)
1894 {
1895     gretl_matrix *m = w->xdata;
1896     const char *zlabel = "MSE";
1897     double xprev, x[3], best[3] = {0};
1898     int critcol = m->cols - 1;
1899     int display, iact = 0;
1900     int i, j, err = 0;
1901     FILE *fp;
1902 
1903     set_optval_string(GNUPLOT, OPT_U, w->plot);
1904 
1905     if (!strcmp(w->plot, "display")) {
1906 	display = iact = 1;
1907     }
1908 
1909     if (gui_mode) {
1910 	fp = open_3d_plot_input_file(&iact);
1911     } else {
1912 	fp = open_plot_input_file(PLOT_USER, 0, &err);
1913     }
1914     if (fp == NULL) {
1915 	err = E_FOPEN;
1916 	return err;
1917     }
1918 
1919     if (!doing_regression(parm)) {
1920 	/* must be classification */
1921 	zlabel = "correct";
1922     }
1923 
1924     gretl_push_c_numeric_locale();
1925 
1926     if (w->grid->linear[G_C]) {
1927 	fputs("set xlabel 'C'\n", fp);
1928     } else {
1929 	fputs("set xlabel 'log2(C)'\n", fp);
1930     }
1931     if (w->grid->linear[G_g]) {
1932 	fputs("set ylabel 'gamma'\n", fp);
1933     } else {
1934 	fputs("set ylabel 'log2(gamma)'\n", fp);
1935     }
1936     fprintf(fp, "set zlabel '%s'\n", zlabel);
1937     fputs("splot '-' using 1:2:3 title '' w pm3d ,\\\n", fp);
1938     fputs(" '-' using 1:2:3 title 'best' w p lt 1 pt 8\n", fp);
1939     xprev = 0;
1940     for (i=0; i<m->rows; i++) {
1941 	x[0] = gretl_matrix_get(m, i, 0);
1942 	x[1] = gretl_matrix_get(m, i, 1);
1943 	x[2] = gretl_matrix_get(m, i, critcol);
1944 	if (x[2] == cmax) {
1945 	    for (j=0; j<3; j++) {
1946 		best[j] = x[j];
1947 	    }
1948 	}
1949 	if (i > 0 && x[0] != xprev) {
1950 	    fputc('\n', fp);
1951 	}
1952 	xprev = x[0];
1953 	fprintf(fp, "%g %g %g\n",
1954 		w->grid->linear[0] ? x[0] : log2(x[0]),
1955 		w->grid->linear[1] ? x[1] : log2(x[1]),
1956 		x[2]);
1957     }
1958     fputs("e\n", fp);
1959     fprintf(fp, "%g %g %g\n",
1960 	    w->grid->linear[0] ? best[0] : log2(best[0]),
1961 	    w->grid->linear[1] ? best[1] : log2(best[1]),
1962 	    best[2]);
1963     fputs("e\n", fp);
1964 
1965     gretl_pop_c_numeric_locale();
1966 
1967     if (gui_mode && display && iact) {
1968 	fputs("pause mouse close\n", fp);
1969 	err = finalize_3d_plot_input_file(fp);
1970 	if (!err && gui_mode) {
1971 	    manufacture_gui_callback(GP_ASYNC);
1972 	}
1973     } else {
1974 	err = finalize_plot_input_file(fp);
1975 	if (!err && gui_mode) {
1976 	    /* wanted? */
1977 	    manufacture_gui_callback(GNUPLOT);
1978 	}
1979     }
1980 
1981     return err;
1982 }
1983 
1984 /* get ready to do parameter search */
1985 
do_search_prep(sv_data * data,sv_parm * parm,sv_wrapper * w,const DATASET * dset,PRN * prn)1986 static int do_search_prep (sv_data *data,
1987 			   sv_parm *parm,
1988 			   sv_wrapper *w,
1989 			   const DATASET *dset,
1990 			   PRN *prn)
1991 {
1992     int err = 0;
1993 
1994     if (0 && parm->kernel_type != RBF) {
1995 	pputs(prn, "Non-RBF kernel, don't know how to tune parameters\n");
1996 	sv_wrapper_remove_grid(w);
1997 	err = E_INVARG;
1998     } else if (w->grid == NULL) {
1999 	sv_wrapper_add_grid(w, parm, NULL);
2000     }
2001 
2002     if (!err && (w->flags & (W_FOLDVAR | W_CONSEC))) {
2003 	w->fsize = get_fold_sizes(data, w, dset);
2004     }
2005 
2006     return err;
2007 }
2008 
maybe_hush(sv_wrapper * w)2009 static void maybe_hush (sv_wrapper *w)
2010 {
2011     if (!(w->flags & W_QUIET)) {
2012 	/* cut out excessive verbosity */
2013 	svm_set_print_string_function(gretl_libsvm_noprint);
2014     }
2015 }
2016 
maybe_resume_printing(sv_wrapper * w)2017 static void maybe_resume_printing (sv_wrapper *w)
2018 {
2019     if (!(w->flags & W_QUIET)) {
2020 	svm_set_print_string_function(gretl_libsvm_print);
2021     }
2022 }
2023 
can_write_plot(sv_wrapper * w)2024 static int can_write_plot (sv_wrapper *w)
2025 {
2026     /* for now we handle only the case of a 2D grid
2027        with C and gamma, but this should be generalized
2028        at some point
2029     */
2030     if (w->xdata == NULL) {
2031 	return 0;
2032     } else if (w->grid->null[G_C] || w->grid->null[G_g]) {
2033 	return 0;
2034     } else if (!w->grid->null[G_p]) {
2035 	return 0;
2036     } else {
2037 	return 1;
2038     }
2039 }
2040 
param_search_finalize(sv_parm * parm,sv_wrapper * w,double cmax,PRN * prn)2041 static void param_search_finalize (sv_parm *parm,
2042 				   sv_wrapper *w,
2043 				   double cmax,
2044 				   PRN *prn)
2045 {
2046     sv_grid *grid = w->grid;
2047 
2048     if (w->plot != NULL && can_write_plot(w)) {
2049 	write_plot_file(w, parm, fabs(cmax));
2050     }
2051 
2052     pprintf(prn, "*** Criterion optimized at %g: C=%g", fabs(cmax), parm->C);
2053     if (uses_gamma(parm)) {
2054 	pprintf(prn, ", gamma=%g", parm->gamma);
2055     }
2056     if (grid->null[G_p]) {
2057 	; /* neither epsilon nor nu used in x-validation */
2058     } else if (uses_epsilon(parm)) {
2059 	pprintf(prn, ", epsilon=%g", parm->p);
2060     } else if (uses_nu(parm)) {
2061 	pprintf(prn, ", nu=%g", parm->nu);
2062     }
2063     pputs(prn, " ***\n");
2064 }
2065 
2066 #ifdef HAVE_MPI
2067 
cross_validate_worker_task(sv_data * data,sv_parm * parm,sv_wrapper * w,double * targ)2068 static int cross_validate_worker_task (sv_data *data,
2069 				       sv_parm *parm,
2070 				       sv_wrapper *w,
2071 				       double *targ)
2072 {
2073     gretl_matrix *task = NULL;
2074     double crit;
2075     int i, j, seq;
2076     int err = 0;
2077 
2078     /* get our sub-task matrix */
2079     task = gretl_matrix_mpi_receive(0, &err);
2080 
2081     /* do the actual cross validation */
2082     for (i=0; i<task->rows && !err; i++) {
2083 	j = 0;
2084 	parm->C     = gretl_matrix_get(task, i, j++);
2085 	parm->gamma = gretl_matrix_get(task, i, j++);
2086 	if (parm->svm_type == EPSILON_SVR) {
2087 	    parm->p = gretl_matrix_get(task, i, j++);
2088 	} else if (parm->svm_type == NU_SVR) {
2089 	    parm->nu = gretl_matrix_get(task, i, j++);
2090 	}
2091 	seq = gretl_matrix_get(task, i, task->cols - 1);
2092 	err = xvalidate_once(data, parm, w, targ, &crit, seq, worker_prn);
2093 	if (!err) {
2094 	    gretl_matrix_set(task, i, j, crit);
2095 	}
2096     }
2097 
2098     /* send results back to root */
2099     if (!err) {
2100 	err = gretl_matrix_mpi_send(task, 0);
2101     }
2102 
2103     /* local clean up */
2104     gretl_matrix_free(task);
2105 
2106     return err;
2107 }
2108 
allocate_submatrices(int n,int cols,int r1,int nr1)2109 static gretl_matrix **allocate_submatrices (int n,
2110 					    int cols,
2111 					    int r1,
2112 					    int nr1)
2113 {
2114     gretl_matrix **M = malloc(n * sizeof *M);
2115     int i, err = 0;
2116 
2117     if (M == NULL) {
2118 	err = E_ALLOC;
2119     } else {
2120 	for (i=0; i<n && !err; i++) {
2121 	    if (i < nr1) {
2122 		M[i] = gretl_matrix_alloc(r1, cols);
2123 	    } else {
2124 		M[i] = gretl_matrix_alloc(r1 + 1, cols);
2125 	    }
2126 	    if (M[i] == NULL) {
2127 		err = E_ALLOC;
2128 	    }
2129 	}
2130     }
2131 
2132     if (err && M != NULL) {
2133 	free(M);
2134 	M = NULL;
2135     }
2136 
2137     return M;
2138 }
2139 
process_results(sv_parm * parm,sv_wrapper * w,gretl_matrix * m,double * pcmax)2140 static int process_results (sv_parm *parm,
2141 			    sv_wrapper *w,
2142 			    gretl_matrix *m,
2143 			    double *pcmax)
2144 {
2145     double crit, cmax = -DBL_MAX;
2146     int i, imax = 0;
2147 
2148     for (i=0; i<m->rows; i++) {
2149 	crit = gretl_matrix_get(m, i, m->cols - 1);
2150 	if (crit > cmax) {
2151 	    cmax = crit;
2152 	    imax = i;
2153 	}
2154 	gretl_matrix_set(m, i, m->cols - 1, fabs(crit));
2155     }
2156 
2157     parm->C = gretl_matrix_get(m, imax, 0);
2158     parm->gamma = gretl_matrix_get(m, imax, 1);
2159     if (!w->grid->null[G_p]) {
2160 	if (parm->svm_type == EPSILON_SVR) {
2161 	    parm->p = gretl_matrix_get(m, imax, 2);
2162 	} else {
2163 	    parm->nu = gretl_matrix_get(m, imax, 2);
2164 	}
2165     }
2166 
2167     /* wrapper takes ownership of @m */
2168     w->xdata = m;
2169 
2170     *pcmax = cmax;
2171 
2172     return 0;
2173 }
2174 
2175 /* The following is called only by the root process,
2176    when doing cross validation via MPI */
2177 
carve_up_xvalidation(sv_data * data,sv_parm * parm,sv_wrapper * w,double * targ,double * pcmax,PRN * prn)2178 static int carve_up_xvalidation (sv_data *data,
2179 				 sv_parm *parm,
2180 				 sv_wrapper *w,
2181 				 double *targ,
2182 				 double *pcmax,
2183 				 PRN *prn)
2184 {
2185     sv_grid *grid = w->grid;
2186     gretl_matrix **M = NULL;
2187     gretl_matrix *m = NULL;
2188     int *row = NULL;
2189     int nC = grid->n[G_C];
2190     int ng = grid->n[G_g];
2191     int np = grid->n[G_p];
2192     double C, g, p, crit;
2193     double *p3 = NULL;
2194     int ncom, nproc = w->nproc;
2195     int i, j, k, ii;
2196     int r1, nr1, seq;
2197     int rem, ncols = 4;
2198     int err = 0;
2199 
2200     if (uses_epsilon(parm)) {
2201 	p = parm->p;
2202 	ncols++;
2203 	if (!grid->null[G_p]) {
2204 	    p3 = &parm->p;
2205 	}
2206     } else if (uses_nu(parm)) {
2207 	p = parm->nu;
2208 	ncols++;
2209 	if (!grid->null[G_p]) {
2210 	    p3 = &parm->nu;
2211 	}
2212     } else {
2213 	p = 0;
2214     }
2215 
2216     ncom = nC * ng * np;  /* number of param combinations */
2217     r1 = ncom / nproc;    /* initial rows per matrix */
2218     rem = ncom % nproc;   /* remainder, if any */
2219     nr1 = nproc - rem;    /* number of matrices with r1 rows */
2220 
2221     if (prn != NULL) {
2222 	pprintf(prn, "MPI: dividing %d parameter specifications "
2223 		"between %d processes\n", ncom, nproc);
2224     }
2225 
2226     /* matrices to store per-worker parameter sets */
2227     M = allocate_submatrices(nproc, ncols, r1, nr1);
2228 
2229     if (M == NULL) {
2230 	err = E_ALLOC;
2231     } else {
2232 	/* array to keep track of which row we're on for
2233 	   each of the submatrices */
2234 	row = malloc(nproc * sizeof *row);
2235 	if (row == NULL) {
2236 	    err = E_ALLOC;
2237 	} else {
2238 	    for (i=0; i<nproc; i++) {
2239 		row[i] = 0;
2240 	    }
2241 	}
2242     }
2243 
2244     if (err) {
2245 	goto bailout;
2246     }
2247 
2248     m = M[nproc-1]; /* convenience pointer to root's submatrix */
2249     C = parm->C;
2250     g = parm->gamma;
2251 
2252     /* We'll dole out the full parameter matrix row by row: this
2253        is an attempt to even up the tasks, given that doing the
2254        cross validation is much more expensive for large values
2255        of C. We add an extra column holding "seq" so that we can
2256        easily put the reassembled matrix into "standard order".
2257     */
2258 
2259     ii = seq = 0;
2260     for (i=0; i<nC; i++) {
2261 	if (!grid->null[G_C]) {
2262 	    C = grid_get_C(grid, i);
2263 	}
2264 	for (j=0; j<ng; j++) {
2265 	    if (!grid->null[G_g]) {
2266 		g = grid_get_g(grid, j);
2267 	    }
2268 	    for (k=0; k<np; k++) {
2269 		if (!grid->null[G_p]) {
2270 		    p = grid_get_p(grid, k);
2271 		}
2272 		if (row[ii] == M[ii]->rows) {
2273 		    /* skip to the larger matrices */
2274 		    ii = nr1;
2275 		}
2276 		gretl_matrix_set(M[ii], row[ii], 0, C);
2277 		gretl_matrix_set(M[ii], row[ii], 1, g);
2278 		if (ncols == 5) {
2279 		    gretl_matrix_set(M[ii], row[ii], 2, p);
2280 		}
2281 		gretl_matrix_set(M[ii], row[ii], ncols - 2, 0);
2282 		gretl_matrix_set(M[ii], row[ii], ncols - 1, seq);
2283 		seq++;
2284 		row[ii] += 1;
2285 		if (ii == nproc - 1) {
2286 		    ii = 0;
2287 		} else {
2288 		    ii++;
2289 		}
2290 	    }
2291 	}
2292     }
2293 
2294     /* send matrices to workers, keeping the last for root */
2295     for (i=0; i<nproc-1; i++) {
2296 	gretl_matrix_mpi_send(M[i], i+1);
2297 	gretl_matrix_free(M[i]);
2298 	M[i] = NULL;
2299     }
2300 
2301     maybe_hush(w);
2302 
2303     /* do root's share of the cross validation */
2304     for (i=0; i<m->rows && !err; i++) {
2305 	parm->C = gretl_matrix_get(m, i, 0);
2306 	parm->gamma = gretl_matrix_get(m, i, 1);
2307 	if (!grid->null[G_p]) {
2308 	    *p3 = gretl_matrix_get(m, i, 2);
2309 	}
2310 	seq = gretl_matrix_get(m, i, ncols-1);
2311 	err = xvalidate_once(data, parm, w, targ, &crit, seq, prn);
2312 	if (!err) {
2313 	    gretl_matrix_set(m, i, ncols-2, crit);
2314 	}
2315     }
2316 
2317     maybe_resume_printing(w);
2318 
2319     /* get results back from workers and process */
2320     if (!err) {
2321 	gretl_matrix *Tmp, *Res = NULL;
2322 	gretl_matrix *mi;
2323 	int row = 0;
2324 
2325 	Tmp = gretl_matrix_alloc(ncom, 5);
2326 	if (Tmp == NULL) {
2327 	    err = E_ALLOC;
2328 	}
2329      	for (i=1; i<nproc && !err; i++) {
2330 	    mi = gretl_matrix_mpi_receive(i, &err);
2331 	    if (mi != NULL) {
2332 		gretl_matrix_inscribe_matrix(Tmp, mi, row, 0, GRETL_MOD_NONE);
2333 		row += mi->rows;
2334 		gretl_matrix_free(mi);
2335 	    }
2336 	}
2337 	if (!err) {
2338 	    gretl_matrix_inscribe_matrix(Tmp, m, row, 0, GRETL_MOD_NONE);
2339 	    Res = gretl_matrix_sort_by_column(Tmp, ncols - 1, &err);
2340 	}
2341 	gretl_matrix_free(Tmp);
2342 	if (Res != NULL) {
2343 	    /* hide the sequence column */
2344 	    gretl_matrix_reuse(Res, -1, ncols - 1);
2345 	    err = process_results(parm, w, Res, pcmax);
2346 	}
2347     }
2348 
2349  bailout:
2350 
2351     free(M);
2352     free(row);
2353 
2354     return err;
2355 }
2356 
call_mpi_cross_validation(sv_data * data,sv_parm * parm,sv_wrapper * w,PRN * prn)2357 static int call_mpi_cross_validation (sv_data *data,
2358 				      sv_parm *parm,
2359 				      sv_wrapper *w,
2360 				      PRN *prn)
2361 {
2362     double cmax = -DBL_MAX;
2363     double *targ;
2364     int err = 0;
2365 
2366     targ = malloc(data->l * sizeof *targ);
2367     if (targ == NULL) {
2368 	return E_ALLOC;
2369     }
2370 
2371     /* arrange to get rand() in sync across the processes */
2372     gretl_mpi_bcast(&w->seed, GRETL_TYPE_UNSIGNED, 0);
2373 
2374     if (w->rank == 0) {
2375 	if (prn != NULL) {
2376 	    print_grid(w->grid, parm, prn);
2377 	}
2378 	err = carve_up_xvalidation(data, parm, w, targ, &cmax, prn);
2379     } else {
2380 	err = cross_validate_worker_task(data, parm, w, targ);
2381     }
2382 
2383     if (w->rank == 0) {
2384 	param_search_finalize(parm, w, cmax, prn);
2385     }
2386 
2387     free(targ);
2388 
2389     return err;
2390 }
2391 
2392 #endif /* HAVE_MPI */
2393 
call_cross_validation(sv_data * data,sv_parm * parm,sv_wrapper * w,double * targ,PRN * prn)2394 static int call_cross_validation (sv_data *data,
2395 				  sv_parm *parm,
2396 				  sv_wrapper *w,
2397 				  double *targ,
2398 				  PRN *prn)
2399 {
2400     double crit;
2401     int err = 0;
2402 
2403     pputs(prn, "Cross-validation ");
2404     if (w->flags & W_CONSEC) {
2405 	pprintf(prn, "using %d consecutive blocks", w->nfold);
2406     } else if (w->flags & W_FOLDVAR) {
2407 	pprintf(prn, "using %d values of %s", w->nfold, w->foldname);
2408     } else {
2409 	pprintf(prn, "using %d random folds", w->nfold);
2410     }
2411     pputs(prn, " (may take a while)\n");
2412     gretl_flush(prn);
2413 
2414     if (w->grid != NULL) {
2415 	sv_grid *grid = w->grid;
2416 	double cmax = -DBL_MAX;
2417 	double *p3 = NULL;
2418 	int nC = grid->n[G_C];
2419 	int ng = grid->n[G_g];
2420 	int np = grid->n[G_p];
2421 	int ibest = 0, jbest = 0, kbest = 0;
2422 	int iter = 0;
2423 	int ncols = 3;
2424 	int i, j, k;
2425 
2426 	if (prn != NULL) {
2427 	    print_grid(grid, parm, prn);
2428 	}
2429 
2430 	if (uses_epsilon(parm)) {
2431 	    ncols++;
2432 	    p3 = &parm->p;
2433 	} else if (uses_nu(parm)) {
2434 	    ncols++;
2435 	    p3 = &parm->nu;
2436 	}
2437 
2438 	if ((w->plot != NULL || (w->flags & W_SVPARM)) && (nC > 1 || ng > 1)) {
2439 	    w->xdata = gretl_matrix_alloc(nC * ng * np, ncols);
2440 	}
2441 
2442 	maybe_hush(w);
2443 
2444 	for (i=0; i<nC; i++) {
2445 	    if (!grid->null[G_C]) {
2446 		parm->C = grid_get_C(grid, i);
2447 	    }
2448 	    for (j=0; j<ng; j++) {
2449 		if (!grid->null[G_g]) {
2450 		    parm->gamma = grid_get_g(grid, j);
2451 		}
2452 		for (k=0; k<np; k++) {
2453 		    if (!grid->null[G_p]) {
2454 			*p3 = grid_get_p(grid, k);
2455 		    }
2456 		    xvalidate_once(data, parm, w, targ, &crit, iter, prn);
2457 		    if (crit > cmax) {
2458 			cmax = crit;
2459 			ibest = i;
2460 			jbest = j;
2461 			kbest = k;
2462 		    }
2463 		    if (w->xdata != NULL) {
2464 			gretl_matrix_set(w->xdata, iter, 0, parm->C);
2465 			gretl_matrix_set(w->xdata, iter, 1, parm->gamma);
2466 			if (p3 != NULL) {
2467 			    gretl_matrix_set(w->xdata, iter, 2, *p3);
2468 			}
2469 			gretl_matrix_set(w->xdata, iter, ncols - 1, fabs(crit));
2470 		    }
2471 		    iter++;
2472 		}
2473 	    }
2474 	}
2475 
2476 	maybe_resume_printing(w);
2477 
2478 	if (!grid->null[G_C]) {
2479 	    parm->C = grid_get_C(grid, ibest);
2480 	}
2481 	if (!grid->null[G_g]) {
2482 	    parm->gamma = grid_get_g(grid, jbest);
2483 	}
2484 	if (!grid->null[G_p]) {
2485 	    *p3 = grid_get_p(grid, kbest);
2486 	}
2487 	param_search_finalize(parm, w, cmax, prn);
2488     } else {
2489 	/* no search: just a single cross validation pass
2490 	   FIXME save the criterion to optional bundle arg?
2491 	*/
2492 	err = xvalidate_once(data, parm, w, targ, &crit, -1, prn);
2493     }
2494 
2495     return err;
2496 }
2497 
2498 /* If the caller provided a "folds" series, check it for
2499    validity: the values must be consecutive integers
2500    starting at 1, and the number of folds must be within
2501    bounds.
2502 */
2503 
check_folds_series(const int * list,const DATASET * dset,sv_wrapper * w,PRN * prn)2504 static int check_folds_series (const int *list,
2505 			       const DATASET *dset,
2506 			       sv_wrapper *w,
2507 			       PRN *prn)
2508 {
2509     gretl_matrix *fvec;
2510     const double *x;
2511     int v = list[list[0]];
2512     int i, t2, n;
2513     int err = 0;
2514 
2515     x = dset->Z[v] + dset->t1;
2516     t2 = w->t2_train > 0 ? w->t2_train : dset->t2;
2517     n = t2 - dset->t1 + 1;
2518 
2519     fvec = gretl_matrix_values(x, n, OPT_S, &err);
2520     if (!err) {
2521 	n = gretl_vector_get_length(fvec);
2522 	x = fvec->val;
2523 
2524 	for (i=0; i<n && !err; i++) {
2525 	    if (i > 0) {
2526 		if (x[i] != x[i-1] + 1) {
2527 		    fputs("foldvar must hold consecutive integers\n", stderr);
2528 		    err = E_DATA;
2529 		}
2530 	    } else if (x[i] != 1.0) {
2531 		fputs("foldvar values must start at 1\n", stderr);
2532 		err = E_DATA;
2533 	    }
2534 	}
2535 
2536 	if (!err) {
2537 	    int nf = x[n-1];
2538 
2539 	    pprintf(prn, "%s: found %d folds\n", dset->varname[v], nf);
2540 	    if (nf < 2 || (w->nfold > 0 && nf != w->nfold)) {
2541 		fprintf(stderr, "invalid number of folds %d\n", nf);
2542 		err = E_DATA;
2543 	    } else {
2544 		strcpy(w->foldname, dset->varname[v]);
2545 		w->nfold = nf;
2546 	    }
2547 	}
2548     }
2549 
2550     gretl_matrix_free(fvec);
2551 
2552     return err;
2553 }
2554 
2555 /* It's OK if there's no @key value in bundle @b, but if it
2556    is present it should hold an integer value. Return 1
2557    if key found and integer retrieved OK, else 0.
2558 */
2559 
get_optional_int(gretl_bundle * b,const char * key,int * ival,int * err)2560 static int get_optional_int (gretl_bundle *b, const char *key,
2561 			     int *ival, int *err)
2562 {
2563     GretlType type = 0;
2564     void *ptr;
2565 
2566     ptr = gretl_bundle_get_data(b, key, &type, NULL, NULL);
2567 
2568     if (ptr != NULL) {
2569 	if (type == GRETL_TYPE_INT) {
2570 	    *ival = *(int *) ptr;
2571 	    return 1;
2572 	} else if (type == GRETL_TYPE_DOUBLE) {
2573 	    *ival = *(double *) ptr;
2574 	    return 1;
2575 	} else if (type == GRETL_TYPE_UNSIGNED) {
2576 	    *ival = *(unsigned *) ptr;
2577 	    return 1;
2578 	} else if (err != NULL) {
2579 	    *err = E_TYPES;
2580 	}
2581     }
2582 
2583     return 0;
2584 }
2585 
get_optional_unsigned(gretl_bundle * b,const char * key,unsigned int * uval,int * err)2586 static int get_optional_unsigned (gretl_bundle *b, const char *key,
2587 				  unsigned int *uval, int *err)
2588 {
2589     GretlType type = 0;
2590     void *ptr;
2591 
2592     ptr = gretl_bundle_get_data(b, key, &type, NULL, NULL);
2593 
2594     if (ptr != NULL) {
2595 	if (type == GRETL_TYPE_INT) {
2596 	    *uval = *(int *) ptr;
2597 	    return 1;
2598 	} else if (type == GRETL_TYPE_DOUBLE) {
2599 	    *uval = *(double *) ptr;
2600 	    return 1;
2601 	} else if (type == GRETL_TYPE_UNSIGNED) {
2602 	    *uval = *(unsigned *) ptr;
2603 	    return 1;
2604 	} else {
2605 	    *err = E_TYPES;
2606 	}
2607     }
2608 
2609     return 0;
2610 }
2611 
2612 /* Determine if @s is a recognized parameter key: we
2613    do this so we can flag anything that may be a
2614    mistyped key.
2615 */
2616 
is_w_parm(const char * s)2617 static int is_w_parm (const char *s)
2618 {
2619     const char *wparms[] = {
2620 	"loadmod", "scaling", "predict", "n_train",
2621 	"folds", "seed", "quiet", "search",
2622 	"foldvar", "consecutive", "yscale",
2623 	"search_only", "grid", "ranges_outfile",
2624 	"data_outfile", "ranges_infile", "model_outfile",
2625 	"model_infile", "plot", "range_format", "refold",
2626 	"autoseed", "regcrit", "use_mpi", NULL
2627     };
2628     int i;
2629 
2630     for (i=0; wparms[i] != NULL; i++) {
2631 	if (!strcmp(s, wparms[i])) {
2632 	    return 1;
2633 	}
2634     }
2635 
2636     return 0;
2637 }
2638 
check_user_params(gretl_array * A)2639 static int check_user_params (gretl_array *A)
2640 {
2641     char **pstrs;
2642     int i, np, err = 0;
2643 
2644     pstrs = gretl_array_get_strings(A, &np);
2645     if (pstrs == NULL) {
2646 	return 0; /* ? */
2647     }
2648 
2649     for (i=0; i<np; i++) {
2650 	if (!is_w_parm(pstrs[i]) && !is_sv_parm(pstrs[i])) {
2651 	    gretl_errmsg_sprintf("svm: unrecognized parameter '%s'",
2652 				 pstrs[i]);
2653 	    err = E_BADOPT;
2654 	    break;
2655 	}
2656     }
2657 
2658     return err;
2659 }
2660 
2661 /* process @bparm, which contains the parameters provided by
2662    the caller */
2663 
read_params_bundle(gretl_bundle * bparm,gretl_bundle * bmod,sv_wrapper * wrap,sv_parm * parm,const int * list,const DATASET * dset,PRN * prn)2664 static int read_params_bundle (gretl_bundle *bparm,
2665 			       gretl_bundle *bmod,
2666 			       sv_wrapper *wrap,
2667 			       sv_parm *parm,
2668 			       const int *list,
2669 			       const DATASET *dset,
2670 			       PRN *prn)
2671 {
2672     gretl_array *A;
2673     int no_savemod = 0;
2674     unsigned int uval;
2675     int ival, err = 0;
2676 
2677     /* got any bad keys in @bparm? */
2678     A = gretl_bundle_get_keys(bparm, NULL);
2679     if (A != NULL) {
2680 	err = check_user_params(A);
2681 	gretl_array_destroy(A);
2682 	if (err) {
2683 	    return err;
2684 	}
2685     }
2686 
2687     /* start by reading some info that's not included in
2688        the libsvm @parm struct
2689     */
2690 
2691     if (get_optional_int(bparm, "loadmod", &ival, &err)) {
2692 	if (ival != 0 && bmod == NULL) {
2693 	    gretl_errmsg_sprintf("svm: invalid 'loadmod' value %d\n", ival);
2694 	    err = E_INVARG;
2695 	} else if (ival != 0) {
2696 	    wrap->flags |= W_LOADMOD;
2697 	    no_savemod = 1;
2698 	}
2699     }
2700 
2701     if (get_optional_int(bparm, "scaling", &ival, &err)) {
2702 	if (ival < 0 || ival > 2) {
2703 	    gretl_errmsg_sprintf("svm: invalid 'scaling' value %d\n", ival);
2704 	    err = E_INVARG;
2705 	} else {
2706 	    wrap->scaling = ival;
2707 	}
2708     }
2709 
2710     if (get_optional_int(bparm, "predict", &ival, &err)) {
2711 	if (ival < 0 || ival > 2) {
2712 	    gretl_errmsg_sprintf("svm: invalid 'predict' value %d\n", ival);
2713 	    err = E_INVARG;
2714 	} else {
2715 	    wrap->predict = ival;
2716 	}
2717     }
2718 
2719     if (get_optional_int(bparm, "n_train", &ival, &err)) {
2720 	/* number of training observations: this sets a range
2721 	   starting at the first complete observation in the
2722 	   incoming sample, which was recorded as wrap->t1
2723 	   after trimming any missing values
2724 	*/
2725 	if (ival == 0) {
2726 	    wrap->flags |= W_NOTRAIN;
2727 	    pputs(prn, "n_train = 0, no training will be done\n");
2728 	} else {
2729 	    int nmax = wrap->t2 - wrap->t1 + 1;
2730 
2731 	    if (ival < list[0]) {
2732 		gretl_errmsg_sprintf("svm: n_train must be at least %d", list[0]);
2733 		err = E_INVARG;
2734 	    } else if (ival > nmax) {
2735 		gretl_errmsg_sprintf("svm: n_train cannot exceed the number of "
2736 				     "complete observations, %d", nmax);
2737 		err = E_INVARG;
2738 	    } else {
2739 		char obs1[OBSLEN], obs2[OBSLEN];
2740 
2741 		wrap->t2_train = wrap->t1 + ival - 1;
2742 		get_obs_string(obs1, wrap->t1, dset);
2743 		get_obs_string(obs2, wrap->t2_train, dset);
2744 		pprintf(prn, "n_train = %d; use observations %s to %s for training\n",
2745 			ival, obs1, obs2);
2746 	    }
2747 	}
2748     }
2749 
2750     if (get_optional_int(bparm, "folds", &ival, &err)) {
2751 	if (ival < 2) {
2752 	    gretl_errmsg_sprintf("svm: invalid 'folds' value %d\n", ival);
2753 	    err = E_INVARG;
2754 	} else {
2755 	    wrap->nfold = ival;
2756 	}
2757     }
2758 
2759     if (get_optional_unsigned(bparm, "seed", &uval, &err)) {
2760 	wrap->seed = uval;
2761     }
2762 
2763     if (get_optional_int(bparm, "refold", &ival, &err) && ival != 0) {
2764 	wrap->flags |= W_REFOLD;
2765     }
2766 
2767     if (get_optional_int(bparm, "quiet", &ival, &err) && ival != 0) {
2768 	wrap->flags |= W_QUIET;
2769     }
2770 
2771     if (get_optional_int(bparm, "search", &ival, &err) && ival != 0) {
2772 	wrap->flags |= W_SEARCH;
2773     }
2774 
2775     if (get_optional_int(bparm, "use_mpi", &ival, &err) && ival != 0) {
2776 	wrap->flags |= W_MPI;
2777     }
2778 
2779     if (get_optional_int(bparm, "foldvar", &ival, &err) && ival != 0) {
2780 	wrap->flags |= W_FOLDVAR;
2781     }
2782 
2783     if (get_optional_int(bparm, "consecutive", &ival, &err) && ival != 0) {
2784 	wrap->flags |= W_CONSEC;
2785     }
2786 
2787     if (get_optional_int(bparm, "yscale", &ival, &err) && ival != 0) {
2788 	wrap->flags |= W_YSCALE;
2789     }
2790 
2791     if (get_optional_int(bparm, "search_only", &ival, &err) && ival != 0) {
2792 	wrap->flags |= W_SEARCH;
2793 	if (bmod != NULL) {
2794 	    wrap->flags |= W_SVPARM;
2795 	    no_savemod = 1;
2796 	}
2797     }
2798 
2799     if (get_optional_int(bparm, "regcrit", &ival, &err) && ival != 0) {
2800 	if (ival < REG_MSE || ival > REG_ROUND_MISS) {
2801 	    gretl_errmsg_sprintf("svm: invalid regcrit value %d", ival);
2802 	    err = E_INVARG;
2803 	} else {
2804 	    wrap->regcrit = ival;
2805 	}
2806     }
2807 
2808     if (!err) {
2809 	const gretl_matrix *m;
2810 
2811 	m = gretl_bundle_get_matrix(bparm, "grid", NULL);
2812 	if (m != NULL) {
2813 	    err = sv_wrapper_add_grid(wrap, NULL, m);
2814 	    if (!err) {
2815 		wrap->flags |= W_SEARCH;
2816 	    }
2817 	}
2818     }
2819 
2820     if (!err) {
2821 	const char *strval;
2822 
2823 	strval = gretl_bundle_get_string(bparm, "ranges_outfile", NULL);
2824 	if (strval != NULL && *strval != '\0') {
2825 	    wrap->ranges_outfile = gretl_strdup(strval);
2826 	}
2827 	strval = gretl_bundle_get_string(bparm, "data_outfile", NULL);
2828 	if (strval != NULL && *strval != '\0') {
2829 	    wrap->data_outfile = gretl_strdup(strval);
2830 	}
2831 	strval = gretl_bundle_get_string(bparm, "ranges_infile", NULL);
2832 	if (strval != NULL && *strval != '\0') {
2833 	    wrap->ranges_infile = gretl_strdup(strval);
2834 	}
2835 	strval = gretl_bundle_get_string(bparm, "model_outfile", NULL);
2836 	if (strval != NULL && *strval != '\0') {
2837 	    wrap->model_outfile = gretl_strdup(strval);
2838 	}
2839 	strval = gretl_bundle_get_string(bparm, "model_infile", NULL);
2840 	if (strval != NULL && *strval != '\0') {
2841 	    wrap->model_infile = gretl_strdup(strval);
2842 	}
2843 	strval = gretl_bundle_get_string(bparm, "plot", NULL);
2844 	if (strval != NULL && *strval != '\0') {
2845 	    wrap->plot = gretl_strdup(strval);
2846 	}
2847 	strval = gretl_bundle_get_string(bparm, "range_format", NULL);
2848 	if (strval != NULL && !strcmp(strval, "libsvm")) {
2849 	    wrap->flags |= W_STDFMT;
2850 	}
2851     }
2852 
2853     if (!err) {
2854 	if (bmod != NULL && !no_savemod) {
2855 	    /* implicitly, a model should be saved to @bmod */
2856 	    wrap->flags |= W_SAVEMOD;
2857 	}
2858 	if ((wrap->flags & W_SEARCH) && !(wrap->flags & W_FOLDVAR) &&
2859 	    wrap->nfold == 0) {
2860 	    /* default to 5 folds */
2861 	    wrap->nfold = 5;
2862 	}
2863     }
2864 
2865     if (!err && (wrap->flags & W_FOLDVAR)) {
2866 	if (wrap->flags & W_CONSEC) {
2867 	    gretl_errmsg_set("The foldvar and consecutive options "
2868 			     "are incompatible");
2869 	    err = E_BADOPT;
2870 	} else {
2871 	    err = check_folds_series(list, dset, wrap, prn);
2872 	}
2873     }
2874 
2875     if (!err) {
2876 	/* if we're still OK, fill out the libsvm @parm struct */
2877 	err = set_svm_parm(parm, bparm, prn);
2878     }
2879 
2880 #if 0 /* FIXME? */
2881     if (!err) {
2882 	/* param search: at present we only support the RBF kernel */
2883 	if ((wrap->flags & W_SEARCH) && parm->kernel_type != RBF) {
2884 	    gretl_errmsg_set("parameter search is only supported for "
2885 			     "the RBF kernel at present");
2886 	    err = E_INVARG;
2887 	}
2888     }
2889 #endif
2890 
2891     return err;
2892 }
2893 
2894 /* check in advance if the incoming list variable is supposed to
2895    hold a fold variable in last place */
2896 
peek_foldvar(gretl_bundle * bparm,int * fvar)2897 static int peek_foldvar (gretl_bundle *bparm, int *fvar)
2898 {
2899     int ival, err = 0;
2900 
2901     if (get_optional_int(bparm, "foldvar", &ival, &err) && ival != 0) {
2902 	*fvar = 1;
2903     }
2904 
2905     return err;
2906 }
2907 
2908 #ifdef HAVE_MPI
2909 
2910 /* Here we're trying to tell if we should invoke automated
2911    local-machine MPI -- having already ascertained that it's
2912    feasible in general terms.
2913 */
2914 
peek_use_mpi(gretl_bundle * bparm,gretl_bundle * bmodel,gretl_bundle * bprob,PRN * prn)2915 static int peek_use_mpi (gretl_bundle *bparm,
2916 			 gretl_bundle *bmodel,
2917 			 gretl_bundle *bprob,
2918 			 PRN *prn)
2919 {
2920     int ival = 0, ret = 0;
2921 
2922     if (bmodel != NULL || bprob != NULL) {
2923 	/* these cases are not handled yet */
2924 	return 0;
2925     }
2926 
2927     if (get_optional_int(bparm, "use_mpi", &ival, NULL)) {
2928 	/* if the user specified @use_mpi, respect it provisionally */
2929 	ret = ival;
2930     } else {
2931 	/* otherwise default to using MPI subject to the check below? */
2932 	ret = 1;
2933     }
2934 
2935     if (ret > 0) {
2936 	/* MPI is actually used internally only in parameter search,
2937 	   so let's peek at that.
2938 	*/
2939 	int search = 0;
2940 
2941 	ival = 0;
2942 	if (get_optional_int(bparm, "search", &ival, NULL) && ival > 0) {
2943 	    search = 1;
2944 	} else if (gretl_bundle_get_matrix(bparm, "grid", NULL) != NULL) {
2945 	    search = 1;
2946 	}
2947 	if (!search && ival > 0) {
2948 	    pputs(prn, "svm: 'use_mpi' option ignored in the absence of search");
2949 	    pputc(prn, '\n');
2950 	    ret = 0;
2951 	}
2952     }
2953 
2954     return ret;
2955 }
2956 
2957 #endif
2958 
2959 /* wrap a couple of libsvm I/0 functions so they respect
2960    @workdir
2961 */
2962 
svm_load_model_wrapper(const char * fname,int * err)2963 static sv_model *svm_load_model_wrapper (const char *fname,
2964 					 int *err)
2965 {
2966     sv_model *model = NULL;
2967 
2968     fname = gretl_maybe_switch_dir(fname);
2969     model = svm_load_model(fname);
2970 
2971     if (model == NULL) {
2972 	*err = E_FOPEN;
2973     }
2974 
2975     return model;
2976 }
2977 
svm_save_model_wrapper(const char * fname,const sv_model * model)2978 static int svm_save_model_wrapper (const char *fname,
2979 				   const sv_model *model)
2980 {
2981     int err = 0;
2982 
2983     fname = gretl_maybe_switch_dir(fname);
2984     err = svm_save_model(fname, model);
2985 
2986     return err ? E_FOPEN : 0;
2987 }
2988 
report_result(int err,PRN * prn)2989 static void report_result (int err, PRN *prn)
2990 {
2991     if (prn != NULL) {
2992 	if (err) {
2993 	    pprintf(prn, "err = %d\n", err);
2994 	} else {
2995 	    pputs(prn, "OK\n");
2996 	}
2997     }
2998 }
2999 
get_svm_ranges(const int * list,DATASET * dset,sv_wrapper * wrap,PRN * prn)3000 static int get_svm_ranges (const int *list,
3001 			   DATASET *dset,
3002 			   sv_wrapper *wrap,
3003 			   PRN *prn)
3004 {
3005     int err = 0;
3006 
3007     if (wrap->ranges_infile != NULL) {
3008 	pprintf(prn, "Getting data ranges from %s... ",
3009 		wrap->ranges_infile);
3010 	err = read_ranges(wrap);
3011 	report_result(err, prn);
3012 	gretl_flush(prn);
3013     } else {
3014 	pprintf(prn, "Getting data ranges (sample = %d to %d)... ",
3015 		dset->t1 + 1, dset->t2 + 1);
3016 	err = get_data_ranges(list, dset, wrap);
3017 	report_result(err, prn);
3018 	if (!err && wrap->ranges_outfile != NULL) {
3019 	    err = write_ranges(wrap);
3020 	    report_result(err, prn);
3021 	}
3022 	gretl_flush(prn);
3023     }
3024 
3025     return err;
3026 }
3027 
3028 /* load an svm model, either from a bundle in memory
3029    or a text file in libsvm format */
3030 
do_load_model(sv_wrapper * w,gretl_bundle * b,PRN * prn,int * err)3031 static sv_model *do_load_model (sv_wrapper *w,
3032 				gretl_bundle *b,
3033 				PRN *prn,
3034 				int *err)
3035 {
3036     sv_model *model = NULL;
3037 
3038     if (w->flags & W_LOADMOD) {
3039 	pputs(prn, "Loading svm model from bundle... ");
3040 	model = svm_model_from_bundle(b, err);
3041     } else {
3042 	pprintf(prn, "Loading svm model from %s... ", w->model_infile);
3043 	model = svm_load_model_wrapper(w->model_infile, err);
3044     }
3045 
3046     return model;
3047 }
3048 
3049 /* save an svm model, either to a bundle in memory
3050    or a text file in libsvm format */
3051 
do_save_model(sv_model * model,sv_wrapper * w,gretl_bundle * b,PRN * prn)3052 static int do_save_model (sv_model *model, sv_wrapper *w,
3053 			  gretl_bundle *b, PRN *prn)
3054 {
3055     int err = 0;
3056 
3057     if (w->flags & W_SAVEMOD) {
3058 	pputs(prn, "Saving svm model to bundle... ");
3059 	err = svm_model_save_to_bundle(model, b);
3060     }
3061     if (w->model_outfile != NULL) {
3062 	pprintf(prn, "Saving svm model as %s... ", w->model_outfile);
3063 	err = svm_save_model_wrapper(w->model_outfile, model);
3064     }
3065 
3066     return err;
3067 }
3068 
3069 /* here we call the libsvm parameter-checking
3070    function; if all is well and we're not in quiet
3071    mode, we print some information on the primary
3072    parameters
3073 */
3074 
check_svm_params(sv_data * data,sv_parm * parm,sv_wrapper * w,PRN * prn)3075 static int check_svm_params (sv_data *data,
3076 			     sv_parm *parm,
3077 			     sv_wrapper *w,
3078 			     PRN *prn)
3079 {
3080     const char *msg = svm_check_parameter(data, parm);
3081     int prob_ok;
3082     int err = 0;
3083 
3084     prob_ok = parm->svm_type == C_SVC ||
3085 	parm->svm_type == NU_SVC ||
3086 	parm->svm_type == EPSILON_SVR ||
3087 	parm->svm_type == NU_SVR;
3088 
3089     pputs(prn, "Checking parameter values... ");
3090     if (msg != NULL) {
3091 	pputs(prn, "problem\n");
3092 	gretl_errmsg_sprintf("svm: %s", msg);
3093 	err = E_INVARG;
3094     } else if (parm->probability && !prob_ok) {
3095 	pputs(prn, "problem\n");
3096 	gretl_errmsg_set("svm: probability estimates not supported "
3097 			 "for this specification");
3098 	err = E_INVARG;
3099     } else if (!doing_regression(parm) && w->regcrit > 0) {
3100 	pputs(prn, "problem\n");
3101 	gretl_errmsg_sprintf("svm: cross validation criterion %d not applicable",
3102 			     w->regcrit);
3103 	err = E_INVARG;
3104     } else if (w->regcrit > 2 && !(w->flags & W_INTDEP)) {
3105 	pputs(prn, "problem\n");
3106 	gretl_errmsg_sprintf("svm: cross validation criterion %d not applicable",
3107 			     w->regcrit);
3108 	err = E_INVARG;
3109     } else if (prn != NULL) {
3110 	pputs(prn, "OK\n");
3111 	pprintf(prn, "svm_type %s, kernel_type %s\n",
3112 		svm_type_names[parm->svm_type],
3113 		kernel_type_names[parm->kernel_type]);
3114 	pprintf(prn, "initial params: C = %g", parm->C);
3115 	if (uses_gamma(parm)) {
3116 	    pprintf(prn, ", gamma = %g", parm->gamma);
3117 	}
3118 	if (uses_epsilon(parm)) {
3119 	    pprintf(prn, ", epsilon = %g", parm->p);
3120 	}
3121 	if (uses_nu(parm)) {
3122 	    pprintf(prn, ", nu = %g", parm->nu);
3123 	}
3124 	pputs(prn, "\n\n");
3125     }
3126 
3127     if (!err && doing_regression(parm) && w->regcrit == 0) {
3128 	w->regcrit = REG_MSE;
3129     }
3130 
3131     return err;
3132 }
3133 
3134 /* note: if we're in auto-MPI mode, only the rank 0 process
3135    executes this function
3136 */
3137 
svm_predict_main(const int * list,gretl_bundle * bmodel,double * yhat,int * yhat_written,DATASET * dset,PRN * prn,sv_parm * parm,sv_wrapper * wrap,sv_data * prob1)3138 static int svm_predict_main (const int *list,
3139 			     gretl_bundle *bmodel,
3140 			     double *yhat,
3141 			     int *yhat_written,
3142 			     DATASET *dset,
3143 			     PRN *prn,
3144 			     sv_parm *parm,
3145 			     sv_wrapper *wrap,
3146 			     sv_data *prob1)
3147 {
3148     sv_data *prob2 = NULL;
3149     sv_cell *x_space2 = NULL;
3150     sv_model *model = NULL;
3151     int do_training = 1;
3152     int err = 0;
3153 
3154     gui_mode = gretl_in_gui_mode();
3155 
3156     if (!err && wrap->data_outfile != NULL && wrap->rank <= 0) {
3157 	err = write_problem(prob1, wrap);
3158 	report_result(err, prn);
3159     }
3160 
3161     if (wrap->flags & W_NOTRAIN) {
3162 	do_training = 0;
3163     }
3164 
3165     if (!err && loading_model(wrap)) {
3166 	do_training = 0;
3167 	model = do_load_model(wrap, bmodel, prn, &err);
3168 	report_result(err, prn);
3169     }
3170 
3171     if (!err && wrap->nfold > 0 && model == NULL) {
3172 #ifdef HAVE_MPI
3173 	if (wrap->nproc > 1) {
3174 	    err = call_mpi_cross_validation(prob1, parm, wrap, prn);
3175 	} else {
3176 	    err = call_cross_validation(prob1, parm, wrap, yhat + dset->t1,
3177 					prn);
3178 	    if (!err) {
3179 		*yhat_written = 1;
3180 	    }
3181 	}
3182 #else
3183 	err = call_cross_validation(prob1, parm, wrap, yhat + dset->t1, prn);
3184 	if (!err) {
3185 	    *yhat_written = 1;
3186 	}
3187 #endif
3188 	if (wrap->flags & W_SEARCH) {
3189 	    /* continue to train using tuned params? */
3190 	    if (wrap->flags & W_SVPARM) {
3191 		/* no, just save the tuned parms */
3192 		save_results_to_bundle(parm, wrap, bmodel);
3193 		wrap->predict = do_training = 0;
3194 	    }
3195 	} else {
3196 	    /* not searching, we're done */
3197 	    wrap->predict = do_training = 0;
3198 	}
3199     }
3200 
3201 #if defined(HAVE_MPI) && defined(_OPENMP)
3202     if (wrap->nproc > 1) {
3203 	/* we're running more than one process, but from this point
3204 	   only rank 0 is working, so we can assign more than one OMP
3205 	   thread to it without worrying about contention?
3206 	*/
3207 	int nc = gretl_n_physical_cores();
3208 
3209 	omp_set_num_threads(nc);
3210     }
3211 #endif
3212 
3213     if (!err && do_training) {
3214 	pputs(prn, "Calling training function (this may take a while)\n");
3215 	gretl_flush(prn);
3216 	if (wrap->do_probs) {
3217 	    maybe_set_svm_seed(wrap);
3218 	}
3219 	model = svm_train(prob1, parm);
3220 	if (model == NULL) {
3221 	    err = E_DATA;
3222 	}
3223 	pprintf(prn, "Training done, err = %d\n", err);
3224 	gretl_flush(prn);
3225     }
3226 
3227     if (model != NULL && saving_model(wrap)) {
3228 	err = do_save_model(model, wrap, bmodel, prn);
3229 	report_result(err, prn);
3230     }
3231 
3232     if (!err && wrap->predict > 0) {
3233 	/* Now do some prediction */
3234 	int training = (wrap->t2_train > 0);
3235 	int T_os = -1;
3236 
3237 	err = real_svm_predict(yhat, prob1, wrap, model, training, dset, prn);
3238 	if (err) {
3239 	    goto bailout;
3240 	}
3241 	*yhat_written = 1;
3242 	dset->t2 = wrap->t2;
3243 	if (training && wrap->predict > 1) {
3244 	    T_os = dset->t2 - wrap->t2_train;
3245 	}
3246 	if (T_os >= 10) {
3247 	    /* If we have some out-of-sample data, go
3248 	       ahead and predict out of sample.
3249 	    */
3250 	    int T;
3251 
3252 	    dset->t1 = wrap->t2_train + 1;
3253 	    T = sample_size(dset);
3254 	    pprintf(prn, "Found %d testing observations\n", T);
3255 	    err = check_test_data(dset, wrap->ranges, wrap->k);
3256 	    if (!err) {
3257 		prob2 = gretl_sv_data_alloc(T, wrap->k, &x_space2, &err);
3258 	    }
3259 	    if (!err) {
3260 		sv_data_fill(prob2, x_space2, wrap, list, dset, 2);
3261 		err = real_svm_predict(yhat, prob2, wrap, model, 0, dset, prn);
3262 	    }
3263 	}
3264     }
3265 
3266  bailout:
3267 
3268     gretl_sv_data_destroy(prob2, x_space2);
3269     if (wrap->flags & W_LOADMOD) {
3270 	gretl_destroy_svm_model(model);
3271     } else {
3272 	svm_free_and_destroy_model(&model);
3273     }
3274     svm_prn = NULL;
3275 
3276     return err;
3277 }
3278 
sv_trim_missing(int * list,int fvar,DATASET * dset)3279 static int sv_trim_missing (int *list, int fvar, DATASET *dset)
3280 {
3281     int t1 = dset->t1;
3282     int t2 = dset->t2;
3283     int T, nmiss = 0;
3284     int err = 0;
3285 
3286     if (fvar) {
3287 	/* Temporary adjustment, since we don't require that
3288 	   the folds variable (in last place) has valid values
3289 	   at _all_ observations, but only in the training set;
3290 	   and we'll check for that later.
3291 	*/
3292 	list[0] -= 1;
3293     }
3294 
3295     list_adjust_sample(list, &t1, &t2, dset, &nmiss);
3296     T = t2 - t1 + 1 - nmiss;
3297 
3298     if (fvar) {
3299 	/* reset to the original value */
3300 	list[0] += 1;
3301     }
3302 
3303     if (T < sample_size(dset)) {
3304 	/* we can't handle NAs for the dependent variable within the
3305 	   (possibly reduced) sample range
3306 	*/
3307 	int t, yno = list[1];
3308 
3309 	for (t=t1; t<=t2 && !err; t++) {
3310 	    if (na(dset->Z[yno][t])) {
3311 		gretl_errmsg_sprintf("Dependent variable is NA at obs %d", t+1);
3312 		err = E_MISSDATA;
3313 	    }
3314 	}
3315 	if (!err) {
3316 	    fprintf(stderr, "sv_trim_missing: t1=%d, t2=%d, excluding %d observations\n",
3317 		    t1+1, t2+1, sample_size(dset) - T);
3318 	}
3319     }
3320 
3321     if (!err) {
3322 	if (T > list[0]) {
3323 	    dset->t1 = t1;
3324 	    dset->t2 = t2;
3325 	} else {
3326 	    err = E_MISSDATA;
3327 	}
3328     }
3329 
3330     return err;
3331 }
3332 
3333 /* If w->seed was automatic, and was actually used, save it
3334    to the incoming bundle in case it may be of interest.
3335 */
3336 
maybe_save_auto_seed(sv_wrapper * w,gretl_bundle * b)3337 static void maybe_save_auto_seed (sv_wrapper *w, gretl_bundle *b)
3338 {
3339     if (gretl_bundle_has_key(b, "seed")) {
3340 	return; /* no, an incoming seed was specified */
3341     } else if (gretl_bundle_has_key(b, "foldvar") ||
3342 	       gretl_bundle_has_key(b, "consecutive")) {
3343 	return; /* no, randomization of folds not employed */
3344     } else if (gretl_bundle_has_key(b, "folds") ||
3345 	       gretl_bundle_has_key(b, "search") ||
3346 	       gretl_bundle_has_key(b, "grid")) {
3347 	/* OK, randomized folds employed */
3348 	gretl_bundle_set_unsigned(b, "autoseed", w->seed);
3349     }
3350 }
3351 
3352 #ifdef HAVE_MPI
3353 
forward_to_gretlmpi(const int * list,gretl_bundle * bparams,DATASET * dset,int np,double * yhat,int * got_yhat,PRN * prn)3354 static int forward_to_gretlmpi (const int *list,
3355 				gretl_bundle *bparams,
3356 				DATASET *dset,
3357 				int np,
3358 				double *yhat,
3359 				int *got_yhat,
3360 				PRN *prn)
3361 {
3362     gchar *fname;
3363     int err = 0;
3364 
3365     /* write data file for gretlmpi */
3366     fname = gretl_make_dotpath("svmtmp.gdt");
3367     err = write_data(fname, (int *) list, dset, OPT_NONE, NULL);
3368     g_free(fname);
3369 
3370     if (!err) {
3371 	/* write parameter bundle for gretlmpi */
3372 	err = gretl_bundle_write_to_file(bparams, "svmtmp.xml", 1);
3373     }
3374 
3375     if (!err) {
3376 	/* compose and execute MPI script */
3377 	if (np > 1) {
3378 	    set_optval_int(MPI, OPT_N, np);
3379 	    err = foreign_start(MPI, NULL, OPT_N, prn);
3380 	} else {
3381 	    err = foreign_start(MPI, NULL, OPT_NONE, prn);
3382 	}
3383 	if (!err) {
3384 	    foreign_append("open @dotdir/svmtmp.gdt -q", MPI);
3385 	    foreign_append("bundle parms = bread(\"@dotdir/svmtmp.xml\")", MPI);
3386 	    foreign_append("list All = dataset", MPI);
3387 	    foreign_append("yhat_svm = svm(All, parms)", MPI);
3388 	    foreign_append("if $mpirank < 1", MPI);
3389 	    foreign_append("  mwrite({yhat_svm}, \"@dotdir/svmtmp.mat\")", MPI);
3390 	    foreign_append("endif", MPI);
3391 	    err = foreign_execute(dset, OPT_L, prn); /* note: --local */
3392 	}
3393     }
3394 
3395     if (!err) {
3396 	gretl_matrix *m;
3397 	int s, t;
3398 
3399 	m = gretl_matrix_read_from_file("svmtmp.mat", 1, &err);
3400 	if (m == NULL) {
3401 	    err = E_DATA;
3402 	} else {
3403 	    if (m->rows == sample_size(dset)) {
3404 		for (t=dset->t1, s=0; t<=dset->t2; t++, s++) {
3405 		    yhat[t] = m->val[s];
3406 		}
3407 		*got_yhat = 1;
3408 	    } else {
3409 		err = E_DATA;
3410 	    }
3411 	    gretl_matrix_free(m);
3412 	}
3413     }
3414 
3415     return err;
3416 }
3417 
3418 #endif
3419 
gretl_svm_driver(const int * list,gretl_bundle * bparams,gretl_bundle * bmodel,gretl_bundle * bprob,double * yhat,int * yhat_written,DATASET * dset,PRN * inprn)3420 int gretl_svm_driver (const int *list,
3421 		      gretl_bundle *bparams,
3422 		      gretl_bundle *bmodel,
3423 		      gretl_bundle *bprob,
3424 		      double *yhat,
3425 		      int *yhat_written,
3426 		      DATASET *dset,
3427 		      PRN *inprn)
3428 {
3429     sv_parm parm;
3430     sv_wrapper wrap;
3431     sv_data *prob = NULL;
3432     sv_cell *x_space = NULL;
3433     PRN *prn = inprn;
3434     int save_t1 = dset->t1;
3435     int save_t2 = dset->t2;
3436     int fvar = 0, lmin = 2;
3437     int T, err = 0;
3438 
3439 #ifdef HAVE_MPI
3440     if (gretl_mpi_rank() > 0) {
3441 	/* cut out chatter from ranks other than 0 */
3442 	prn = NULL;
3443     } else if (auto_mpi_ok()) {
3444 	int np = peek_use_mpi(bparams, bmodel, bprob, prn);
3445 
3446 	if (np > 0) {
3447 	    return forward_to_gretlmpi(list, bparams, dset, np,
3448 				       yhat, yhat_written, prn);
3449 	}
3450     }
3451 #endif
3452 
3453     /* look ahead to see if we're supposed to be getting
3454        a "folds" variable at the end of @list */
3455     err = peek_foldvar(bparams, &fvar);
3456     if (err) {
3457 	return err;
3458     } else if (fvar) {
3459 	lmin = 3;
3460     }
3461 
3462     /* general checking and initialization */
3463     if (list == NULL || list[0] < lmin) {
3464 	gretl_errmsg_set("svm: invalid list argument");
3465 	err = E_INVARG;
3466     } else {
3467 	/* adjust the sample range for any NAs fore or aft */
3468 	err = sv_trim_missing((int *) list, fvar, dset);
3469 	if (!err) {
3470 	    sv_wrapper_init(&wrap, dset);
3471 	    err = read_params_bundle(bparams, bmodel, &wrap, &parm,
3472 				     list, dset, prn);
3473 	}
3474     }
3475 
3476     if (err) {
3477 	/* restore incoming sample and get out */
3478 	dset->t1 = save_t1;
3479 	dset->t2 = save_t2;
3480 	return err;
3481     }
3482 
3483 #ifdef HAVE_MPI
3484     wrap.nproc = gretl_mpi_n_processes();
3485     if (wrap.nproc > 1) {
3486 	/* we're actually in MPI mode */
3487 	wrap.rank = gretl_mpi_rank();
3488 	worker_prn = NULL;
3489 	if (wrap.rank > 0) {
3490 	    /* let rank 0 do (almost) all the talking */
3491 	    if (!(wrap.flags & W_QUIET)) {
3492 		worker_prn = inprn;
3493 		wrap.flags |= W_QUIET;
3494 	    }
3495 	}
3496     }
3497 #endif
3498 
3499     if (wrap.flags & W_QUIET) {
3500 	svm_set_print_string_function(gretl_libsvm_noprint);
3501 	prn = NULL;
3502     } else {
3503 	svm_prn = prn;
3504 	svm_set_print_string_function(gretl_libsvm_print);
3505     }
3506 
3507     if (wrap.t2_train > 0) {
3508 	/* adjust sample temporarily */
3509 	dset->t2 = wrap.t2_train;
3510     }
3511     T = sample_size(dset);
3512 
3513     err = get_svm_ranges(list, dset, &wrap, prn);
3514 
3515     if (!err) {
3516 	wrap.k = (int) gretl_matrix_get(wrap.ranges, 0, 2) - 1;
3517 	pputs(prn, "Allocating problem space... ");
3518 	prob = gretl_sv_data_alloc(T, wrap.k, &x_space, &err);
3519 	report_result(err, prn);
3520     }
3521     gretl_flush(prn);
3522 
3523     if (!err) {
3524 	/* fill out the "problem" data */
3525 	pputs(prn, "Scaling and transcribing data... ");
3526 	sv_data_fill(prob, x_space, &wrap, list, dset, 1);
3527 	if (parm.svm_type < 0) {
3528 	    parm.svm_type = wrap.auto_type;
3529 	}
3530 	if (parm.gamma == 0) {
3531 	    parm.gamma = 1.0 / wrap.k;
3532 	}
3533 	pputs(prn, "OK\n");
3534 	/* we're now ready to run a full check on @parm */
3535 	err = check_svm_params(prob, &parm, &wrap, prn);
3536 	if (!err && parm.probability) {
3537 	    wrap.do_probs = 1;
3538 	}
3539     }
3540 
3541     if (!err && (wrap.flags & W_SEARCH)) {
3542 	err = do_search_prep(prob, &parm, &wrap, dset, prn);
3543     }
3544 
3545     if (err) {
3546 	goto getout;
3547     }
3548 
3549     if (wrap.nproc < 2 || !(wrap.flags & W_SEARCH)) {
3550 	/* not doing parameter search via MPI */
3551 	err = svm_predict_main(list, bmodel,
3552 			       yhat, yhat_written,
3553 			       dset, prn, &parm,
3554 			       &wrap, prob);
3555 	goto getout;
3556     }
3557 
3558 #ifdef HAVE_MPI
3559     if (wrap.rank == 0) {
3560 	err = svm_predict_main(list, bmodel,
3561 			       yhat, yhat_written,
3562 			       dset, prn, &parm,
3563 			       &wrap, prob);
3564     } else {
3565 	err = call_mpi_cross_validation(prob, &parm, &wrap, NULL);
3566     }
3567 #endif
3568 
3569  getout:
3570 
3571     if (!err && bprob != NULL) {
3572 	save_probs_to_bundle(&wrap, bprob);
3573     }
3574 
3575     if (!err && bparams != NULL) {
3576 	maybe_save_auto_seed(&wrap, bparams);
3577     }
3578 
3579     gretl_sv_data_destroy(prob, x_space);
3580     svm_destroy_param(&parm);
3581     sv_wrapper_free(&wrap);
3582 
3583     dset->t1 = save_t1;
3584     dset->t2 = save_t2;
3585 
3586     return err;
3587 }
3588