1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "version.h"
22
23 #include <glib.h>
24
25 /* provides for one-way and two-way ANOVA */
26
27 struct anova {
28 int n; /* observations used */
29 int nt; /* number of treatment 'levels' */
30 int nb; /* number of 'blocks' (if applicable) */
31 double SST; /* total sum of squares */
32 double SSTr; /* treatment sum of squares */
33 double SSB; /* 'block' sum of squares (if applicable) */
34 double SSE; /* error sum of squares */
35 double F; /* F-test */
36 double pval; /* p-value for F-test */
37 double *cmeans; /* column means */
38 double *rmeans; /* row means */
39 int *ccount; /* column counts */
40 int *rcount; /* row counts */
41 double *tvec; /* workspace follows */
42 double *bvec;
43 gretl_matrix *tvals;
44 gretl_matrix *bvals;
45 };
46
anova_init(struct anova * v)47 static void anova_init (struct anova *v)
48 {
49 v->n = v->nt = v->nb = 0;
50 v->SST = v->SSTr = v->SSB = v->SSE = 0.0;
51 v->F = v->pval = NADBL;
52 v->cmeans = v->rmeans = NULL;
53 v->ccount = v->rcount = NULL;
54 v->tvec = v->bvec = NULL;
55 v->tvals = v->bvals = NULL;
56 }
57
anova_free(struct anova * v)58 static void anova_free (struct anova *v)
59 {
60 free(v->cmeans);
61 free(v->rmeans);
62 free(v->ccount);
63 free(v->rcount);
64 free(v->tvec);
65 free(v->bvec);
66
67 gretl_matrix_free(v->tvals);
68 gretl_matrix_free(v->bvals);
69 }
70
print_anova(struct anova * v,PRN * prn)71 static int print_anova (struct anova *v, PRN *prn)
72 {
73 int dftotal, dftreat, dfblock, dferr;
74 double mst, msr, mse;
75 int n, c1, c2, c3;
76
77 dftotal = v->n - 1;
78 dftreat = v->nt - 1;
79 dfblock = (v->nb > 0)? v->nb - 1 : 0;
80 dferr = dftotal - dftreat - dfblock;
81
82 pputs(prn, "\n\n");
83
84 c1 = g_utf8_strlen(_("Sum of squares"), -1);
85 c2 = g_utf8_strlen(_("df"), -1);
86 c3 = g_utf8_strlen(_("Mean square"), -1);
87
88 c1 = (c1 < 35)? 35 : c1;
89 c2 = (c2 > 8)? c2 + 1 : (c2 < 8)? 8 : c2;
90 c3 = (c3 > 16)? c3 + 1 : (c3 < 16)? 16 : c3;
91
92 /* header strings are right-aligned */
93 n = g_utf8_strlen(_("Sum of squares"), -1);
94 bufspace(c1 - n, prn);
95 pputs(prn, _("Sum of squares"));
96 n = g_utf8_strlen(_("df"), -1);
97 bufspace(c2 + 1 - n, prn);
98 pputs(prn, _("df"));
99 n = g_utf8_strlen(_("Mean square"), -1);
100 bufspace(c3 + 1 - n, prn);
101 pputs(prn, _("Mean square"));
102 pputs(prn, "\n\n");
103 c1 = 16;
104
105 /* Mean Square, treatment */
106 msr = v->SSTr / dftreat;
107 /* string left-aligned with initial offset of 2 */
108 n = g_utf8_strlen(_("Treatment"), -1);
109 bufspace(2, prn);
110 pputs(prn, _("Treatment"));
111 bufspace(16 - n, prn);
112 pprintf(prn, " %*g %*d %*g\n", c1, v->SSTr, c2, dftreat, c3, msr);
113
114 if (dfblock > 0) {
115 /* Mean Square, block */
116 double msb = v->SSB / dfblock;
117
118 /* string left-aligned with initial offset of 2 */
119 n = g_utf8_strlen(_("Block"), -1);
120 bufspace(2, prn);
121 pputs(prn, _("Block"));
122 bufspace(16 - n, prn);
123 pprintf(prn, " %*g %*d %*g\n", c1, v->SSB, c2, dfblock, c3, msb);
124 }
125
126 /* Mean Square, errors */
127 mse = v->SSE / dferr;
128 /* string left-aligned with initial offset of 2 */
129 n = g_utf8_strlen(_("Residual"), -1);
130 bufspace(2, prn);
131 pputs(prn, _("Residual"));
132 bufspace(16 - n, prn);
133 pprintf(prn, " %*g %*d %*g\n", c1, v->SSE, c2, dferr, c3, mse);
134
135 /* Mean Square, total */
136 mst = v->SST / dftotal;
137 /* string left-aligned with initial offset of 2 */
138 n = g_utf8_strlen(_("Total"), -1);
139 bufspace(2, prn);
140 pputs(prn, _("Total"));
141 bufspace(16 - n, prn);
142 pprintf(prn, " %*g %*d %*g\n", c1, v->SST, c2, dftotal, c3, mst);
143
144 pputc(prn, '\n');
145
146 if (na(v->F)) {
147 pprintf(prn, " F(%d, %d) = %g / %g (%s)\n\n", dftreat, dferr,
148 msr, mse, _("undefined"));
149 } else {
150 pprintf(prn, " F(%d, %d) = %g / %g = %g ",
151 dftreat, dferr, msr, mse, v->F);
152 if (v->pval < .0001) {
153 pprintf(prn, "[%s %.3g]\n\n", _("p-value"), v->pval);
154 } else if (!na(v->pval)) {
155 pprintf(prn, "[%s %.4f]\n\n", _("p-value"), v->pval);
156 }
157 }
158
159 return 0;
160 }
161
162 /* one-way anova only: print the mean and standard deviation
163 of the response at each level of treatment */
164
anova_print_means(struct anova * v,const double * xt,const double * y,double ybar,int t1,int t2,PRN * prn)165 static int anova_print_means (struct anova *v, const double *xt,
166 const double *y, double ybar,
167 int t1, int t2, PRN *prn)
168 {
169 double d, *csd = malloc(v->nt * sizeof *csd);
170 int c1, c2, c3, c4;
171 int i, t;
172
173 if (csd == NULL) {
174 return E_ALLOC;
175 }
176
177 for (i=0; i<v->nt; i++) {
178 csd[i] = 0.0;
179 }
180
181 for (t=t1; t<=t2; t++) {
182 if (!na(xt[t]) && !na(y[t])) {
183 for (i=0; i<v->nt; i++) {
184 if (xt[t] == v->tvals->val[i]) {
185 d = y[t] - v->cmeans[i];
186 csd[i] += d * d;
187 break;
188 }
189 }
190 }
191 }
192
193 c1 = g_utf8_strlen(_("Level"), -1);
194 c2 = g_utf8_strlen(_("n"), -1);
195 c3 = g_utf8_strlen(_("mean"), -1);
196 c4 = g_utf8_strlen(_("std. dev"), -1);
197
198 c1 = (c1 < 8)? 8 : c1;
199 c2 = (c2 > 6)? c2 + 1 : (c2 < 6)? 6 : c2;
200 c3 = (c3 > 10)? c3 + 1 : (c3 < 10)? 10 : c3;
201 c4 = (c4 > 12)? c4 + 1 : (c4 < 12)? 12 : c4;
202
203 pprintf(prn, " %-*s %*s %*s %*s\n\n", c1, _("Level"), c2, _("n"),
204 c3, _("mean"), c4, _("std. dev"));
205
206 for (i=0; i<v->nt; i++) {
207 if (v->ccount[i] > 1) {
208 csd[i] /= v->ccount[i] - 1;
209 csd[i] = sqrt(csd[i]);
210 pprintf(prn, " %-*g %*d %*g %#*.5g\n", c1, v->tvals->val[i],
211 c2, v->ccount[i], c3, v->cmeans[i], c4, csd[i]);
212 } else {
213 pprintf(prn, " %-*g %*d %*g %*s\n", c1, v->tvals->val[i],
214 c2, v->ccount[i], c3, v->cmeans[i], c4, "NA");
215 }
216 }
217
218 pprintf(prn, "\n %s = %g\n\n", _("Grand mean"), ybar);
219
220 free(csd);
221
222 return 0;
223 }
224
225 /* allocate arrays to hold the valid, in-sample values of the
226 treatment and block variables */
227
anova_make_arrays(const double * xb,struct anova * v)228 static int anova_make_arrays (const double *xb, struct anova *v)
229 {
230 int err = 0;
231
232 v->tvec = malloc(v->n * sizeof *v->tvec);
233
234 if (v->tvec == NULL) {
235 err = E_ALLOC;
236 } else if (xb != NULL) {
237 v->bvec = malloc(v->n * sizeof *v->bvec);
238 if (v->bvec == NULL) {
239 free(v->tvec);
240 v->tvec = NULL;
241 err = E_ALLOC;
242 }
243 }
244
245 return err;
246 }
247
248 /* construct vectors holding the distinct values of the
249 treatment and block variables */
250
anova_make_value_vecs(struct anova * v)251 static int anova_make_value_vecs (struct anova *v)
252 {
253 int err = 0;
254
255 v->tvals = gretl_matrix_values(v->tvec, v->n, OPT_S, &err);
256
257 if (!err && v->tvals->rows < 2) {
258 gretl_errmsg_set("Insufficient observations");
259 err = E_DATA;
260 }
261
262 if (!err && v->bvec != NULL) {
263 v->bvals = gretl_matrix_values(v->bvec, v->n, OPT_S, &err);
264 if (!err && v->bvals->rows < 2) {
265 gretl_errmsg_set("Insufficient observations");
266 err = E_DATA;
267 }
268 }
269
270 if (!err) {
271 v->nt = v->tvals->rows;
272 if (v->bvals != NULL) {
273 v->nb = v->bvals->rows;
274 }
275 }
276
277 return err;
278 }
279
280 /* allocate and initialize arrays to calculate the column
281 and row means of the ANOVA table */
282
anova_accounting_arrays(struct anova * v)283 static int anova_accounting_arrays (struct anova *v)
284 {
285 int i;
286
287 v->cmeans = malloc(v->nt * sizeof *v->cmeans);
288 v->ccount = malloc(v->nt * sizeof *v->ccount);
289
290 if (v->cmeans == NULL || v->ccount == NULL) {
291 return E_ALLOC;
292 }
293
294 for (i=0; i<v->nt; i++) {
295 v->cmeans[i] = 0.0;
296 v->ccount[i] = 0;
297 }
298
299 if (v->nb > 0) {
300 v->rmeans = malloc(v->nb * sizeof *v->rmeans);
301 v->rcount = malloc(v->nb * sizeof *v->rcount);
302
303 if (v->rmeans == NULL || v->rcount == NULL) {
304 return E_ALLOC;
305 }
306
307 for (i=0; i<v->nb; i++) {
308 v->rmeans[i] = 0.0;
309 v->rcount[i] = 0;
310 }
311 }
312
313 return 0;
314 }
315
anova_add_F_stat(struct anova * v)316 static void anova_add_F_stat (struct anova *v)
317 {
318 int dfn = v->nt - 1;
319 int dfd = v->n - 1 - dfn;
320 double MSE, MSTr = v->SSTr / dfn;
321
322 if (v->nb > 0) {
323 dfd -= v->nb - 1;
324 }
325
326 MSE = v->SSE / dfd;
327
328 if (MSE > 0) {
329 v->F = MSTr / MSE;
330 v->pval = snedecor_cdf_comp(dfn, dfd, v->F);
331 }
332
333 record_test_result(v->F, v->pval);
334 }
335
336 #define anova_obs_ok(y,x,z,t) (!na(y[t]) && !na(x[t]) && \
337 (z == NULL || !na(z[t])))
338
339 /* For one-way anova @list contains response and treatment; for
340 two-way it should in addition contain the block variable.
341 @opt can contain OPT_Q to suppress printing.
342 */
343
gretl_anova(const int * list,const DATASET * dset,gretlopt opt,PRN * prn)344 int gretl_anova (const int *list, const DATASET *dset,
345 gretlopt opt, PRN *prn)
346 {
347 struct anova v;
348 const double *y, *xt, *xb;
349 double ybar, dev;
350 int i, t, t1, t2;
351 int missvals = 0;
352 int err = 0;
353
354 if (list[0] < 2 || list[0] > 3) {
355 return E_DATA;
356 }
357
358 anova_init(&v);
359
360 t1 = dset->t1;
361 t2 = dset->t2;
362 list_adjust_sample(list, &t1, &t2, dset, &missvals);
363
364 v.n = t2 - t1 + 1 - missvals;
365 if (v.n < 2) {
366 return E_TOOFEW;
367 }
368
369 y = dset->Z[list[1]];
370 xt = dset->Z[list[2]];
371 xb = (list[0] == 3)? dset->Z[list[3]] : NULL;
372
373 /* check that treatment (and block, if present) are discrete */
374
375 if (!accept_as_discrete(dset, list[2], 0)) {
376 gretl_errmsg_set(_("anova: the treatment variable must be discrete"));
377 err = E_DATA;
378 } else if (xb != NULL && !accept_as_discrete(dset, list[3], 0)) {
379 gretl_errmsg_set(_("anova: the block variable must be discrete"));
380 err = E_DATA;
381 }
382
383 if (err) {
384 return err;
385 }
386
387 v.n = 0;
388 for (t=t1; t<=t2; t++) {
389 if (anova_obs_ok(y, xt, xb, t)) {
390 v.n += 1;
391 }
392 }
393
394 if (v.n < 2) {
395 return E_TOOFEW;
396 }
397
398 err = anova_make_arrays(xb, &v);
399 if (err) {
400 return err;
401 }
402
403 /* fill tvec and bvec; calculate grand mean */
404
405 ybar = 0.0;
406 i = 0;
407 for (t=t1; t<=t2; t++) {
408 if (anova_obs_ok(y, xt, xb, t)) {
409 v.tvec[i] = xt[t];
410 ybar += y[t];
411 if (v.bvec != NULL) {
412 v.bvec[i] = xb[t];
413 }
414 i++;
415 }
416 }
417
418 ybar /= v.n;
419
420 err = anova_make_value_vecs(&v);
421 if (err) {
422 goto bailout;
423 }
424
425 err = anova_accounting_arrays(&v);
426 if (err) {
427 goto bailout;
428 }
429
430 /* find column (treatment) means */
431
432 for (t=t1; t<=t2; t++) {
433 if (anova_obs_ok(y, xt, xb, t)) {
434 dev = y[t] - ybar;
435 v.SST += dev * dev;
436 for (i=0; i<v.nt; i++) {
437 if (xt[t] == v.tvals->val[i]) {
438 v.cmeans[i] += y[t];
439 v.ccount[i] += 1;
440 break;
441 }
442 }
443 }
444 }
445
446 for (i=0; i<v.nt; i++) {
447 v.cmeans[i] /= v.ccount[i];
448 }
449
450 /* sums of squares */
451
452 if (v.nb > 0) {
453 /* two-way ANOVA */
454 for (t=t1; t<=t2; t++) {
455 if (anova_obs_ok(y, xt, xb, t)) {
456 for (i=0; i<v.nb; i++) {
457 if (xb[t] == v.bvals->val[i]) {
458 v.rmeans[i] += y[t];
459 v.rcount[i] += 1;
460 break;
461 }
462 }
463 }
464 }
465
466 for (i=0; i<v.nb; i++) {
467 v.rmeans[i] /= v.rcount[i];
468 dev = v.rmeans[i] - ybar;
469 v.SSB += dev * dev * v.rcount[i];
470 }
471
472 for (i=0; i<v.nt; i++) {
473 dev = v.cmeans[i] - ybar;
474 v.SSTr += dev * dev * v.ccount[i];
475 }
476
477 v.SSE = v.SST - v.SSTr - v.SSB;
478 } else {
479 /* one-way ANOVA */
480 for (t=t1; t<=t2; t++) {
481 if (!na(xt[t]) && !na(y[t])) {
482 for (i=0; i<v.nt; i++) {
483 if (xt[t] == v.tvals->val[i]) {
484 dev = y[t] - v.cmeans[i];
485 v.SSE += dev * dev;
486 break;
487 }
488 }
489 }
490 }
491 v.SSTr = v.SST - v.SSE;
492 }
493
494 anova_add_F_stat(&v);
495
496 if (!(opt & OPT_Q)) {
497 const char *yname = dset->varname[list[1]];
498 const char *tname = dset->varname[list[2]];
499
500 pputc(prn, '\n');
501 pprintf(prn, _("%s, response = %s, treatment = %s:"),
502 _("Analysis of Variance"), yname, tname);
503
504 err = print_anova(&v, prn);
505
506 if (!err && v.nb == 0) {
507 anova_print_means(&v, xt, y, ybar, t1, t2, prn);
508 }
509 }
510
511 bailout:
512
513 anova_free(&v);
514
515 return err;
516 }
517