1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include "libgretl.h"
21 #include "version.h"
22 #include "nonparam.h"
23 
24 #define KDEBUG 0
25 
26 /* For discussion of kernel density estimation see Davidson and
27    MacKinnon, Econometric Theory and Methods, Section 15.5.
28 */
29 
30 #define ROOT5  2.23606797749979     /* sqrt(5) */
31 #define EPMULT 0.3354101966249685   /* 3 over (4 * sqrt(5)) */
32 
33 enum {
34     GAUSSIAN_KERNEL,
35     EPANECHNIKOV_KERNEL
36 };
37 
38 typedef struct kernel_info_ kernel_info;
39 
40 struct kernel_info_ {
41     int type;        /* Gaussian or Epanechnikov */
42     double *x;       /* single data array */
43     gretl_matrix *X; /* for multiple data */
44     int n;           /* number of elements in x */
45     int kn;          /* number of points to use */
46     double h;        /* single bandwidth */
47     double *hvec;    /* multiple bandwidths */
48     double xmin;
49     double xmax;
50     double xstep;
51 };
52 
ep_pdf(double z)53 static double ep_pdf (double z)
54 {
55     if (fabs(z) >= ROOT5) {
56 	return 0.0;
57     } else {
58 	return EPMULT * (1.0 - z * z / 5.0);
59     }
60 }
61 
kernel(kernel_info * kinfo,double x0,int j)62 static double kernel (kernel_info *kinfo, double x0, int j)
63 {
64     double h, den = 0.0;
65     int in_range = 0;
66     int i;
67 
68     if (kinfo->hvec != NULL) {
69 	h = kinfo->hvec[j];
70     } else {
71 	h = kinfo->h;
72     }
73 
74     for (i=0; i<kinfo->n; i++) {
75 	double z = (x0 - kinfo->x[i]) / h;
76 
77 	if (kinfo->type == GAUSSIAN_KERNEL) {
78 	    den += normal_pdf(z);
79 	} else {
80 	    double dt = ep_pdf(z);
81 
82 	    if (!in_range && dt > 0) {
83 		in_range = 1;
84 	    } else if (in_range && dt == 0.0) {
85 		break;
86 	    }
87 
88 	    den += ep_pdf(z);
89 	}
90     }
91 
92     den /= h * kinfo->n;
93 
94     return den;
95 }
96 
density_plot(kernel_info * kinfo,const char * vname)97 static int density_plot (kernel_info *kinfo, const char *vname)
98 {
99     FILE *fp;
100     gchar *tmp = NULL;
101     double xt, xdt;
102     int t, err = 0;
103 
104     fp = open_plot_input_file(PLOT_KERNEL, 0, &err);
105     if (err) {
106 	return err;
107     }
108 
109     gretl_push_c_numeric_locale();
110 
111     fputs("set nokey\n", fp);
112     fprintf(fp, "set xrange [%g:%g]\n", kinfo->xmin, kinfo->xmax);
113 
114     fputs("# literal lines = 2\n", fp);
115     fprintf(fp, "set label \"%s\" at graph .65, graph .97\n",
116 	    (kinfo->type == GAUSSIAN_KERNEL)? _("Gaussian kernel") :
117 	    _("Epanechnikov kernel"));
118     tmp = g_strdup_printf(_("bandwidth = %g"), kinfo->h);
119     fprintf(fp, "set label \"%s\" at graph .65, graph .93\n", tmp);
120     g_free(tmp);
121 
122     tmp = g_strdup_printf(_("Estimated density of %s"), vname);
123     fprintf(fp, "set title \"%s\"\n", tmp);
124     g_free(tmp);
125 
126     fputs("plot \\\n'-' using 1:2 w lines\n", fp);
127 
128     xt = kinfo->xmin;
129     for (t=0; t<=kinfo->kn; t++) {
130 	xdt = kernel(kinfo, xt, 0);
131 	fprintf(fp, "%g %g\n", xt, xdt);
132 	xt += kinfo->xstep;
133     }
134     fputs("e\n", fp);
135 
136     gretl_pop_c_numeric_locale();
137 
138     return finalize_plot_input_file(fp);
139 }
140 
density_matrix(kernel_info * kinfo,int * err)141 static gretl_matrix *density_matrix (kernel_info *kinfo,
142 				     int *err)
143 {
144     gretl_matrix *m;
145     double xt, xdt;
146     int t;
147 
148     m = gretl_matrix_alloc(kinfo->kn + 1, 2);
149     if (m == NULL) {
150 	*err = E_ALLOC;
151 	return NULL;
152     }
153 
154     xt = kinfo->xmin;
155     for (t=0; t<=kinfo->kn; t++) {
156 	xdt = kernel(kinfo, xt, 0);
157 	gretl_matrix_set(m, t, 0, xt);
158 	gretl_matrix_set(m, t, 1, xdt);
159 	xt += kinfo->xstep;
160     }
161 
162     return m;
163 }
164 
multi_density_matrix(kernel_info * kinfo,int * err)165 static gretl_matrix *multi_density_matrix (kernel_info *kinfo,
166 					   int *err)
167 {
168     gretl_matrix *m;
169     double xt, xdtj;
170     int nc = kinfo->X->cols;
171     int t, j;
172 
173     m = gretl_matrix_alloc(kinfo->kn + 1, 1 + nc);
174     if (m == NULL) {
175 	*err = E_ALLOC;
176 	return NULL;
177     }
178 
179     xt = kinfo->xmin;
180     for (t=0; t<=kinfo->kn; t++) {
181 	gretl_matrix_set(m, t, 0, xt);
182 	for (j=0; j<nc; j++) {
183 	    kinfo->x = kinfo->X->val + j * kinfo->n;
184 	    xdtj = kernel(kinfo, xt, j);
185 	    gretl_matrix_set(m, t, j+1, xdtj);
186 	}
187 	xt += kinfo->xstep;
188     }
189 
190     return m;
191 }
192 
kernel_xmin_xmax(kernel_info * kinfo)193 static int kernel_xmin_xmax (kernel_info *kinfo)
194 {
195     const double *x = kinfo->x;
196     double xbar, sdx, xm4, xp4;
197     int err, n = kinfo->n;
198 
199     err = gretl_moments(0, n - 1, kinfo->x, NULL,
200 			&xbar, &sdx, NULL, NULL, 1);
201     if (err) {
202 	return err;
203     }
204 
205     xm4 = xbar - 4.0 * sdx;
206     xp4 = xbar + 4.0 * sdx;
207 
208     if (xp4 > x[n-1]) {
209 	kinfo->xmax = xp4;
210     } else {
211 	kinfo->xmax = x[n-1];
212     }
213 
214     if (xm4 < x[0]) {
215 	kinfo->xmin = xm4;
216     } else {
217 	kinfo->xmin = x[0];
218     }
219 
220     if (kinfo->xmin < 0.0 && x[0] >= 0.0) {
221 	/* if data are non-negative, don't set a negative min */
222 	kinfo->xmin = x[0];
223     }
224 
225     kinfo->xstep = (kinfo->xmax - kinfo->xmin) / kinfo->kn;
226 
227     return 0;
228 }
229 
kernel_kn(int nobs)230 static int kernel_kn (int nobs)
231 {
232     if (nobs >= 1000) {
233 	return 1000;
234     } else if (nobs >= 200) {
235 	return 200;
236     } else if (nobs >= 100) {
237 	return 100;
238     } else {
239 	return 50;
240     }
241 }
242 
set_kernel_params(kernel_info * kinfo,double bwscale,gretlopt opt)243 static int set_kernel_params (kernel_info *kinfo,
244 			      double bwscale,
245 			      gretlopt opt)
246 {
247     double bw = kernel_bandwidth(kinfo->x, kinfo->n);
248     int err = 0;
249 
250     kinfo->h = bwscale * bw;
251 
252     if (kinfo->h <= 0.0) {
253 	return E_DATA;
254     }
255 
256     /* number of points to use */
257     kinfo->kn = kernel_kn(kinfo->n);
258 
259     /* range to use */
260     err = kernel_xmin_xmax(kinfo);
261 
262     kinfo->type = (opt & OPT_O)? EPANECHNIKOV_KERNEL :
263 	GAUSSIAN_KERNEL;
264 
265     return err;
266 }
267 
268 #define MINOBS 30
269 
get_sorted_x(const double * y,int * pn,int * err)270 static double *get_sorted_x (const double *y, int *pn, int *err)
271 {
272     double *x = malloc(*pn * sizeof *x);
273     int i, n = 0;
274 
275     if (x == NULL) {
276 	*err = E_ALLOC;
277 	return NULL;
278     }
279 
280     for (i=0; i<*pn; i++) {
281 	if (!na(y[i])) {
282 	    x[n++] = y[i];
283 	}
284     }
285 
286     if (n < MINOBS) {
287 	*err = E_TOOFEW;
288 	free(x);
289 	x = NULL;
290     } else {
291 	qsort(x, n, sizeof *x, gretl_compare_doubles);
292 	*pn = n;
293     }
294 
295     return x;
296 }
297 
298 int
kernel_density(const double * y,int n,double bwscale,const char * label,gretlopt opt)299 kernel_density (const double *y, int n, double bwscale,
300 		const char *label, gretlopt opt)
301 {
302     kernel_info kinfo = {0};
303     int err = 0;
304 
305     kinfo.n = n;
306     kinfo.x = get_sorted_x(y, &kinfo.n, &err);
307     if (err) {
308 	return err;
309     }
310 
311     err = set_kernel_params(&kinfo, bwscale, opt);
312 
313     if (!err) {
314 	err = density_plot(&kinfo, label);
315     }
316 
317     free(kinfo.x);
318 
319     return err;
320 }
321 
322 gretl_matrix *
multiple_kd_matrix(const gretl_matrix * X,double bwscale,gretlopt opt,int * err)323 multiple_kd_matrix (const gretl_matrix *X, double bwscale,
324 		    gretlopt opt, int *err)
325 {
326     double Xmin = 0, Xmax = 0;
327     double bw, *xi = NULL;
328     gretl_matrix *m = NULL;
329     kernel_info kinfo = {0};
330     int j;
331 
332     kinfo.n = X->rows;
333     if (kinfo.n < MINOBS) {
334 	*err = E_TOOFEW;
335 	return NULL;
336     }
337 
338     kinfo.X = gretl_matrix_copy(X);
339     if (kinfo.X == NULL) {
340 	*err = E_ALLOC;
341 	return NULL;
342     }
343 
344     kinfo.hvec = malloc(X->cols * sizeof *kinfo.hvec);
345     if (kinfo.hvec == NULL) {
346 	*err = E_ALLOC;
347 	gretl_matrix_free(kinfo.X);
348 	return NULL;
349     }
350 
351     /* sort Xs per column, get bandwidths and extrema */
352     for (j=0; j<X->cols; j++) {
353 	xi = kinfo.X->val + kinfo.n * j;
354 	qsort(xi, kinfo.n, sizeof *xi, gretl_compare_doubles);
355 	bw = kernel_bandwidth(xi, kinfo.n);
356 	kinfo.hvec[j] = bwscale * bw;
357 	kinfo.x = xi;
358 	kernel_xmin_xmax(&kinfo);
359 	if (j == 0) {
360 	    Xmin = kinfo.xmin;
361 	    Xmax = kinfo.xmax;
362 	} else {
363 	    if (kinfo.xmin < Xmin) Xmin = kinfo.xmin;
364 	    if (kinfo.xmax > Xmax) Xmax = kinfo.xmax;
365 	}
366     }
367 
368     /* number of points to use */
369     kinfo.kn = kernel_kn(kinfo.n);
370 
371     kinfo.xmin = Xmin;
372     kinfo.xmax = Xmax;
373     kinfo.xstep = (kinfo.xmax - kinfo.xmin) / kinfo.kn;
374     kinfo.type = (opt & OPT_O)? EPANECHNIKOV_KERNEL :
375 	GAUSSIAN_KERNEL;
376 
377     if (!*err) {
378 	m = multi_density_matrix(&kinfo, err);
379     }
380 
381     gretl_matrix_free(kinfo.X);
382     free(kinfo.hvec);
383 
384     return m;
385 }
386 
387 gretl_matrix *
kernel_density_matrix(const double * y,int n,double bwscale,gretlopt opt,int * err)388 kernel_density_matrix (const double *y, int n, double bwscale,
389 		       gretlopt opt, int *err)
390 {
391     gretl_matrix *m = NULL;
392     kernel_info kinfo = {0};
393 
394     kinfo.n = n;
395     kinfo.x = get_sorted_x(y, &kinfo.n, err);
396     if (*err) {
397 	return NULL;
398     }
399 
400     *err = set_kernel_params(&kinfo, bwscale, opt);
401 
402     if (!*err) {
403 	m = density_matrix(&kinfo, err);
404     }
405 
406     free(kinfo.x);
407 
408     return m;
409 }
410 
411 int
array_kernel_density(const double * x,int n,const char * label)412 array_kernel_density (const double *x, int n, const char *label)
413 {
414     kernel_info kinfo = {0};
415     int err = 0;
416 
417     if (n < MINOBS) {
418 	return E_TOOFEW;
419     }
420 
421     kinfo.x = (double *) x;
422     kinfo.n = n;
423 
424     err = set_kernel_params(&kinfo, 1.0, OPT_NONE);
425 
426     if (!err) {
427 	err = density_plot(&kinfo, label);
428     }
429 
430     return err;
431 }
432