1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "version.h"
22 #include "nonparam.h"
23
24 #define KDEBUG 0
25
26 /* For discussion of kernel density estimation see Davidson and
27 MacKinnon, Econometric Theory and Methods, Section 15.5.
28 */
29
30 #define ROOT5 2.23606797749979 /* sqrt(5) */
31 #define EPMULT 0.3354101966249685 /* 3 over (4 * sqrt(5)) */
32
33 enum {
34 GAUSSIAN_KERNEL,
35 EPANECHNIKOV_KERNEL
36 };
37
38 typedef struct kernel_info_ kernel_info;
39
40 struct kernel_info_ {
41 int type; /* Gaussian or Epanechnikov */
42 double *x; /* single data array */
43 gretl_matrix *X; /* for multiple data */
44 int n; /* number of elements in x */
45 int kn; /* number of points to use */
46 double h; /* single bandwidth */
47 double *hvec; /* multiple bandwidths */
48 double xmin;
49 double xmax;
50 double xstep;
51 };
52
ep_pdf(double z)53 static double ep_pdf (double z)
54 {
55 if (fabs(z) >= ROOT5) {
56 return 0.0;
57 } else {
58 return EPMULT * (1.0 - z * z / 5.0);
59 }
60 }
61
kernel(kernel_info * kinfo,double x0,int j)62 static double kernel (kernel_info *kinfo, double x0, int j)
63 {
64 double h, den = 0.0;
65 int in_range = 0;
66 int i;
67
68 if (kinfo->hvec != NULL) {
69 h = kinfo->hvec[j];
70 } else {
71 h = kinfo->h;
72 }
73
74 for (i=0; i<kinfo->n; i++) {
75 double z = (x0 - kinfo->x[i]) / h;
76
77 if (kinfo->type == GAUSSIAN_KERNEL) {
78 den += normal_pdf(z);
79 } else {
80 double dt = ep_pdf(z);
81
82 if (!in_range && dt > 0) {
83 in_range = 1;
84 } else if (in_range && dt == 0.0) {
85 break;
86 }
87
88 den += ep_pdf(z);
89 }
90 }
91
92 den /= h * kinfo->n;
93
94 return den;
95 }
96
density_plot(kernel_info * kinfo,const char * vname)97 static int density_plot (kernel_info *kinfo, const char *vname)
98 {
99 FILE *fp;
100 gchar *tmp = NULL;
101 double xt, xdt;
102 int t, err = 0;
103
104 fp = open_plot_input_file(PLOT_KERNEL, 0, &err);
105 if (err) {
106 return err;
107 }
108
109 gretl_push_c_numeric_locale();
110
111 fputs("set nokey\n", fp);
112 fprintf(fp, "set xrange [%g:%g]\n", kinfo->xmin, kinfo->xmax);
113
114 fputs("# literal lines = 2\n", fp);
115 fprintf(fp, "set label \"%s\" at graph .65, graph .97\n",
116 (kinfo->type == GAUSSIAN_KERNEL)? _("Gaussian kernel") :
117 _("Epanechnikov kernel"));
118 tmp = g_strdup_printf(_("bandwidth = %g"), kinfo->h);
119 fprintf(fp, "set label \"%s\" at graph .65, graph .93\n", tmp);
120 g_free(tmp);
121
122 tmp = g_strdup_printf(_("Estimated density of %s"), vname);
123 fprintf(fp, "set title \"%s\"\n", tmp);
124 g_free(tmp);
125
126 fputs("plot \\\n'-' using 1:2 w lines\n", fp);
127
128 xt = kinfo->xmin;
129 for (t=0; t<=kinfo->kn; t++) {
130 xdt = kernel(kinfo, xt, 0);
131 fprintf(fp, "%g %g\n", xt, xdt);
132 xt += kinfo->xstep;
133 }
134 fputs("e\n", fp);
135
136 gretl_pop_c_numeric_locale();
137
138 return finalize_plot_input_file(fp);
139 }
140
density_matrix(kernel_info * kinfo,int * err)141 static gretl_matrix *density_matrix (kernel_info *kinfo,
142 int *err)
143 {
144 gretl_matrix *m;
145 double xt, xdt;
146 int t;
147
148 m = gretl_matrix_alloc(kinfo->kn + 1, 2);
149 if (m == NULL) {
150 *err = E_ALLOC;
151 return NULL;
152 }
153
154 xt = kinfo->xmin;
155 for (t=0; t<=kinfo->kn; t++) {
156 xdt = kernel(kinfo, xt, 0);
157 gretl_matrix_set(m, t, 0, xt);
158 gretl_matrix_set(m, t, 1, xdt);
159 xt += kinfo->xstep;
160 }
161
162 return m;
163 }
164
multi_density_matrix(kernel_info * kinfo,int * err)165 static gretl_matrix *multi_density_matrix (kernel_info *kinfo,
166 int *err)
167 {
168 gretl_matrix *m;
169 double xt, xdtj;
170 int nc = kinfo->X->cols;
171 int t, j;
172
173 m = gretl_matrix_alloc(kinfo->kn + 1, 1 + nc);
174 if (m == NULL) {
175 *err = E_ALLOC;
176 return NULL;
177 }
178
179 xt = kinfo->xmin;
180 for (t=0; t<=kinfo->kn; t++) {
181 gretl_matrix_set(m, t, 0, xt);
182 for (j=0; j<nc; j++) {
183 kinfo->x = kinfo->X->val + j * kinfo->n;
184 xdtj = kernel(kinfo, xt, j);
185 gretl_matrix_set(m, t, j+1, xdtj);
186 }
187 xt += kinfo->xstep;
188 }
189
190 return m;
191 }
192
kernel_xmin_xmax(kernel_info * kinfo)193 static int kernel_xmin_xmax (kernel_info *kinfo)
194 {
195 const double *x = kinfo->x;
196 double xbar, sdx, xm4, xp4;
197 int err, n = kinfo->n;
198
199 err = gretl_moments(0, n - 1, kinfo->x, NULL,
200 &xbar, &sdx, NULL, NULL, 1);
201 if (err) {
202 return err;
203 }
204
205 xm4 = xbar - 4.0 * sdx;
206 xp4 = xbar + 4.0 * sdx;
207
208 if (xp4 > x[n-1]) {
209 kinfo->xmax = xp4;
210 } else {
211 kinfo->xmax = x[n-1];
212 }
213
214 if (xm4 < x[0]) {
215 kinfo->xmin = xm4;
216 } else {
217 kinfo->xmin = x[0];
218 }
219
220 if (kinfo->xmin < 0.0 && x[0] >= 0.0) {
221 /* if data are non-negative, don't set a negative min */
222 kinfo->xmin = x[0];
223 }
224
225 kinfo->xstep = (kinfo->xmax - kinfo->xmin) / kinfo->kn;
226
227 return 0;
228 }
229
kernel_kn(int nobs)230 static int kernel_kn (int nobs)
231 {
232 if (nobs >= 1000) {
233 return 1000;
234 } else if (nobs >= 200) {
235 return 200;
236 } else if (nobs >= 100) {
237 return 100;
238 } else {
239 return 50;
240 }
241 }
242
set_kernel_params(kernel_info * kinfo,double bwscale,gretlopt opt)243 static int set_kernel_params (kernel_info *kinfo,
244 double bwscale,
245 gretlopt opt)
246 {
247 double bw = kernel_bandwidth(kinfo->x, kinfo->n);
248 int err = 0;
249
250 kinfo->h = bwscale * bw;
251
252 if (kinfo->h <= 0.0) {
253 return E_DATA;
254 }
255
256 /* number of points to use */
257 kinfo->kn = kernel_kn(kinfo->n);
258
259 /* range to use */
260 err = kernel_xmin_xmax(kinfo);
261
262 kinfo->type = (opt & OPT_O)? EPANECHNIKOV_KERNEL :
263 GAUSSIAN_KERNEL;
264
265 return err;
266 }
267
268 #define MINOBS 30
269
get_sorted_x(const double * y,int * pn,int * err)270 static double *get_sorted_x (const double *y, int *pn, int *err)
271 {
272 double *x = malloc(*pn * sizeof *x);
273 int i, n = 0;
274
275 if (x == NULL) {
276 *err = E_ALLOC;
277 return NULL;
278 }
279
280 for (i=0; i<*pn; i++) {
281 if (!na(y[i])) {
282 x[n++] = y[i];
283 }
284 }
285
286 if (n < MINOBS) {
287 *err = E_TOOFEW;
288 free(x);
289 x = NULL;
290 } else {
291 qsort(x, n, sizeof *x, gretl_compare_doubles);
292 *pn = n;
293 }
294
295 return x;
296 }
297
298 int
kernel_density(const double * y,int n,double bwscale,const char * label,gretlopt opt)299 kernel_density (const double *y, int n, double bwscale,
300 const char *label, gretlopt opt)
301 {
302 kernel_info kinfo = {0};
303 int err = 0;
304
305 kinfo.n = n;
306 kinfo.x = get_sorted_x(y, &kinfo.n, &err);
307 if (err) {
308 return err;
309 }
310
311 err = set_kernel_params(&kinfo, bwscale, opt);
312
313 if (!err) {
314 err = density_plot(&kinfo, label);
315 }
316
317 free(kinfo.x);
318
319 return err;
320 }
321
322 gretl_matrix *
multiple_kd_matrix(const gretl_matrix * X,double bwscale,gretlopt opt,int * err)323 multiple_kd_matrix (const gretl_matrix *X, double bwscale,
324 gretlopt opt, int *err)
325 {
326 double Xmin = 0, Xmax = 0;
327 double bw, *xi = NULL;
328 gretl_matrix *m = NULL;
329 kernel_info kinfo = {0};
330 int j;
331
332 kinfo.n = X->rows;
333 if (kinfo.n < MINOBS) {
334 *err = E_TOOFEW;
335 return NULL;
336 }
337
338 kinfo.X = gretl_matrix_copy(X);
339 if (kinfo.X == NULL) {
340 *err = E_ALLOC;
341 return NULL;
342 }
343
344 kinfo.hvec = malloc(X->cols * sizeof *kinfo.hvec);
345 if (kinfo.hvec == NULL) {
346 *err = E_ALLOC;
347 gretl_matrix_free(kinfo.X);
348 return NULL;
349 }
350
351 /* sort Xs per column, get bandwidths and extrema */
352 for (j=0; j<X->cols; j++) {
353 xi = kinfo.X->val + kinfo.n * j;
354 qsort(xi, kinfo.n, sizeof *xi, gretl_compare_doubles);
355 bw = kernel_bandwidth(xi, kinfo.n);
356 kinfo.hvec[j] = bwscale * bw;
357 kinfo.x = xi;
358 kernel_xmin_xmax(&kinfo);
359 if (j == 0) {
360 Xmin = kinfo.xmin;
361 Xmax = kinfo.xmax;
362 } else {
363 if (kinfo.xmin < Xmin) Xmin = kinfo.xmin;
364 if (kinfo.xmax > Xmax) Xmax = kinfo.xmax;
365 }
366 }
367
368 /* number of points to use */
369 kinfo.kn = kernel_kn(kinfo.n);
370
371 kinfo.xmin = Xmin;
372 kinfo.xmax = Xmax;
373 kinfo.xstep = (kinfo.xmax - kinfo.xmin) / kinfo.kn;
374 kinfo.type = (opt & OPT_O)? EPANECHNIKOV_KERNEL :
375 GAUSSIAN_KERNEL;
376
377 if (!*err) {
378 m = multi_density_matrix(&kinfo, err);
379 }
380
381 gretl_matrix_free(kinfo.X);
382 free(kinfo.hvec);
383
384 return m;
385 }
386
387 gretl_matrix *
kernel_density_matrix(const double * y,int n,double bwscale,gretlopt opt,int * err)388 kernel_density_matrix (const double *y, int n, double bwscale,
389 gretlopt opt, int *err)
390 {
391 gretl_matrix *m = NULL;
392 kernel_info kinfo = {0};
393
394 kinfo.n = n;
395 kinfo.x = get_sorted_x(y, &kinfo.n, err);
396 if (*err) {
397 return NULL;
398 }
399
400 *err = set_kernel_params(&kinfo, bwscale, opt);
401
402 if (!*err) {
403 m = density_matrix(&kinfo, err);
404 }
405
406 free(kinfo.x);
407
408 return m;
409 }
410
411 int
array_kernel_density(const double * x,int n,const char * label)412 array_kernel_density (const double *x, int n, const char *label)
413 {
414 kernel_info kinfo = {0};
415 int err = 0;
416
417 if (n < MINOBS) {
418 return E_TOOFEW;
419 }
420
421 kinfo.x = (double *) x;
422 kinfo.n = n;
423
424 err = set_kernel_params(&kinfo, 1.0, OPT_NONE);
425
426 if (!err) {
427 err = density_plot(&kinfo, label);
428 }
429
430 return err;
431 }
432