/*
* gretl -- Gnu Regression, Econometrics and Time-series Library
* Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*/
#include "libgretl.h"
#include "version.h"
#include
/* provides for one-way and two-way ANOVA */
struct anova {
int n; /* observations used */
int nt; /* number of treatment 'levels' */
int nb; /* number of 'blocks' (if applicable) */
double SST; /* total sum of squares */
double SSTr; /* treatment sum of squares */
double SSB; /* 'block' sum of squares (if applicable) */
double SSE; /* error sum of squares */
double F; /* F-test */
double pval; /* p-value for F-test */
double *cmeans; /* column means */
double *rmeans; /* row means */
int *ccount; /* column counts */
int *rcount; /* row counts */
double *tvec; /* workspace follows */
double *bvec;
gretl_matrix *tvals;
gretl_matrix *bvals;
};
static void anova_init (struct anova *v)
{
v->n = v->nt = v->nb = 0;
v->SST = v->SSTr = v->SSB = v->SSE = 0.0;
v->F = v->pval = NADBL;
v->cmeans = v->rmeans = NULL;
v->ccount = v->rcount = NULL;
v->tvec = v->bvec = NULL;
v->tvals = v->bvals = NULL;
}
static void anova_free (struct anova *v)
{
free(v->cmeans);
free(v->rmeans);
free(v->ccount);
free(v->rcount);
free(v->tvec);
free(v->bvec);
gretl_matrix_free(v->tvals);
gretl_matrix_free(v->bvals);
}
static int print_anova (struct anova *v, PRN *prn)
{
int dftotal, dftreat, dfblock, dferr;
double mst, msr, mse;
int n, c1, c2, c3;
dftotal = v->n - 1;
dftreat = v->nt - 1;
dfblock = (v->nb > 0)? v->nb - 1 : 0;
dferr = dftotal - dftreat - dfblock;
pputs(prn, "\n\n");
c1 = g_utf8_strlen(_("Sum of squares"), -1);
c2 = g_utf8_strlen(_("df"), -1);
c3 = g_utf8_strlen(_("Mean square"), -1);
c1 = (c1 < 35)? 35 : c1;
c2 = (c2 > 8)? c2 + 1 : (c2 < 8)? 8 : c2;
c3 = (c3 > 16)? c3 + 1 : (c3 < 16)? 16 : c3;
/* header strings are right-aligned */
n = g_utf8_strlen(_("Sum of squares"), -1);
bufspace(c1 - n, prn);
pputs(prn, _("Sum of squares"));
n = g_utf8_strlen(_("df"), -1);
bufspace(c2 + 1 - n, prn);
pputs(prn, _("df"));
n = g_utf8_strlen(_("Mean square"), -1);
bufspace(c3 + 1 - n, prn);
pputs(prn, _("Mean square"));
pputs(prn, "\n\n");
c1 = 16;
/* Mean Square, treatment */
msr = v->SSTr / dftreat;
/* string left-aligned with initial offset of 2 */
n = g_utf8_strlen(_("Treatment"), -1);
bufspace(2, prn);
pputs(prn, _("Treatment"));
bufspace(16 - n, prn);
pprintf(prn, " %*g %*d %*g\n", c1, v->SSTr, c2, dftreat, c3, msr);
if (dfblock > 0) {
/* Mean Square, block */
double msb = v->SSB / dfblock;
/* string left-aligned with initial offset of 2 */
n = g_utf8_strlen(_("Block"), -1);
bufspace(2, prn);
pputs(prn, _("Block"));
bufspace(16 - n, prn);
pprintf(prn, " %*g %*d %*g\n", c1, v->SSB, c2, dfblock, c3, msb);
}
/* Mean Square, errors */
mse = v->SSE / dferr;
/* string left-aligned with initial offset of 2 */
n = g_utf8_strlen(_("Residual"), -1);
bufspace(2, prn);
pputs(prn, _("Residual"));
bufspace(16 - n, prn);
pprintf(prn, " %*g %*d %*g\n", c1, v->SSE, c2, dferr, c3, mse);
/* Mean Square, total */
mst = v->SST / dftotal;
/* string left-aligned with initial offset of 2 */
n = g_utf8_strlen(_("Total"), -1);
bufspace(2, prn);
pputs(prn, _("Total"));
bufspace(16 - n, prn);
pprintf(prn, " %*g %*d %*g\n", c1, v->SST, c2, dftotal, c3, mst);
pputc(prn, '\n');
if (na(v->F)) {
pprintf(prn, " F(%d, %d) = %g / %g (%s)\n\n", dftreat, dferr,
msr, mse, _("undefined"));
} else {
pprintf(prn, " F(%d, %d) = %g / %g = %g ",
dftreat, dferr, msr, mse, v->F);
if (v->pval < .0001) {
pprintf(prn, "[%s %.3g]\n\n", _("p-value"), v->pval);
} else if (!na(v->pval)) {
pprintf(prn, "[%s %.4f]\n\n", _("p-value"), v->pval);
}
}
return 0;
}
/* one-way anova only: print the mean and standard deviation
of the response at each level of treatment */
static int anova_print_means (struct anova *v, const double *xt,
const double *y, double ybar,
int t1, int t2, PRN *prn)
{
double d, *csd = malloc(v->nt * sizeof *csd);
int c1, c2, c3, c4;
int i, t;
if (csd == NULL) {
return E_ALLOC;
}
for (i=0; int; i++) {
csd[i] = 0.0;
}
for (t=t1; t<=t2; t++) {
if (!na(xt[t]) && !na(y[t])) {
for (i=0; int; i++) {
if (xt[t] == v->tvals->val[i]) {
d = y[t] - v->cmeans[i];
csd[i] += d * d;
break;
}
}
}
}
c1 = g_utf8_strlen(_("Level"), -1);
c2 = g_utf8_strlen(_("n"), -1);
c3 = g_utf8_strlen(_("mean"), -1);
c4 = g_utf8_strlen(_("std. dev"), -1);
c1 = (c1 < 8)? 8 : c1;
c2 = (c2 > 6)? c2 + 1 : (c2 < 6)? 6 : c2;
c3 = (c3 > 10)? c3 + 1 : (c3 < 10)? 10 : c3;
c4 = (c4 > 12)? c4 + 1 : (c4 < 12)? 12 : c4;
pprintf(prn, " %-*s %*s %*s %*s\n\n", c1, _("Level"), c2, _("n"),
c3, _("mean"), c4, _("std. dev"));
for (i=0; int; i++) {
if (v->ccount[i] > 1) {
csd[i] /= v->ccount[i] - 1;
csd[i] = sqrt(csd[i]);
pprintf(prn, " %-*g %*d %*g %#*.5g\n", c1, v->tvals->val[i],
c2, v->ccount[i], c3, v->cmeans[i], c4, csd[i]);
} else {
pprintf(prn, " %-*g %*d %*g %*s\n", c1, v->tvals->val[i],
c2, v->ccount[i], c3, v->cmeans[i], c4, "NA");
}
}
pprintf(prn, "\n %s = %g\n\n", _("Grand mean"), ybar);
free(csd);
return 0;
}
/* allocate arrays to hold the valid, in-sample values of the
treatment and block variables */
static int anova_make_arrays (const double *xb, struct anova *v)
{
int err = 0;
v->tvec = malloc(v->n * sizeof *v->tvec);
if (v->tvec == NULL) {
err = E_ALLOC;
} else if (xb != NULL) {
v->bvec = malloc(v->n * sizeof *v->bvec);
if (v->bvec == NULL) {
free(v->tvec);
v->tvec = NULL;
err = E_ALLOC;
}
}
return err;
}
/* construct vectors holding the distinct values of the
treatment and block variables */
static int anova_make_value_vecs (struct anova *v)
{
int err = 0;
v->tvals = gretl_matrix_values(v->tvec, v->n, OPT_S, &err);
if (!err && v->tvals->rows < 2) {
gretl_errmsg_set("Insufficient observations");
err = E_DATA;
}
if (!err && v->bvec != NULL) {
v->bvals = gretl_matrix_values(v->bvec, v->n, OPT_S, &err);
if (!err && v->bvals->rows < 2) {
gretl_errmsg_set("Insufficient observations");
err = E_DATA;
}
}
if (!err) {
v->nt = v->tvals->rows;
if (v->bvals != NULL) {
v->nb = v->bvals->rows;
}
}
return err;
}
/* allocate and initialize arrays to calculate the column
and row means of the ANOVA table */
static int anova_accounting_arrays (struct anova *v)
{
int i;
v->cmeans = malloc(v->nt * sizeof *v->cmeans);
v->ccount = malloc(v->nt * sizeof *v->ccount);
if (v->cmeans == NULL || v->ccount == NULL) {
return E_ALLOC;
}
for (i=0; int; i++) {
v->cmeans[i] = 0.0;
v->ccount[i] = 0;
}
if (v->nb > 0) {
v->rmeans = malloc(v->nb * sizeof *v->rmeans);
v->rcount = malloc(v->nb * sizeof *v->rcount);
if (v->rmeans == NULL || v->rcount == NULL) {
return E_ALLOC;
}
for (i=0; inb; i++) {
v->rmeans[i] = 0.0;
v->rcount[i] = 0;
}
}
return 0;
}
static void anova_add_F_stat (struct anova *v)
{
int dfn = v->nt - 1;
int dfd = v->n - 1 - dfn;
double MSE, MSTr = v->SSTr / dfn;
if (v->nb > 0) {
dfd -= v->nb - 1;
}
MSE = v->SSE / dfd;
if (MSE > 0) {
v->F = MSTr / MSE;
v->pval = snedecor_cdf_comp(dfn, dfd, v->F);
}
record_test_result(v->F, v->pval);
}
#define anova_obs_ok(y,x,z,t) (!na(y[t]) && !na(x[t]) && \
(z == NULL || !na(z[t])))
/* For one-way anova @list contains response and treatment; for
two-way it should in addition contain the block variable.
@opt can contain OPT_Q to suppress printing.
*/
int gretl_anova (const int *list, const DATASET *dset,
gretlopt opt, PRN *prn)
{
struct anova v;
const double *y, *xt, *xb;
double ybar, dev;
int i, t, t1, t2;
int missvals = 0;
int err = 0;
if (list[0] < 2 || list[0] > 3) {
return E_DATA;
}
anova_init(&v);
t1 = dset->t1;
t2 = dset->t2;
list_adjust_sample(list, &t1, &t2, dset, &missvals);
v.n = t2 - t1 + 1 - missvals;
if (v.n < 2) {
return E_TOOFEW;
}
y = dset->Z[list[1]];
xt = dset->Z[list[2]];
xb = (list[0] == 3)? dset->Z[list[3]] : NULL;
/* check that treatment (and block, if present) are discrete */
if (!accept_as_discrete(dset, list[2], 0)) {
gretl_errmsg_set(_("anova: the treatment variable must be discrete"));
err = E_DATA;
} else if (xb != NULL && !accept_as_discrete(dset, list[3], 0)) {
gretl_errmsg_set(_("anova: the block variable must be discrete"));
err = E_DATA;
}
if (err) {
return err;
}
v.n = 0;
for (t=t1; t<=t2; t++) {
if (anova_obs_ok(y, xt, xb, t)) {
v.n += 1;
}
}
if (v.n < 2) {
return E_TOOFEW;
}
err = anova_make_arrays(xb, &v);
if (err) {
return err;
}
/* fill tvec and bvec; calculate grand mean */
ybar = 0.0;
i = 0;
for (t=t1; t<=t2; t++) {
if (anova_obs_ok(y, xt, xb, t)) {
v.tvec[i] = xt[t];
ybar += y[t];
if (v.bvec != NULL) {
v.bvec[i] = xb[t];
}
i++;
}
}
ybar /= v.n;
err = anova_make_value_vecs(&v);
if (err) {
goto bailout;
}
err = anova_accounting_arrays(&v);
if (err) {
goto bailout;
}
/* find column (treatment) means */
for (t=t1; t<=t2; t++) {
if (anova_obs_ok(y, xt, xb, t)) {
dev = y[t] - ybar;
v.SST += dev * dev;
for (i=0; ival[i]) {
v.cmeans[i] += y[t];
v.ccount[i] += 1;
break;
}
}
}
}
for (i=0; i 0) {
/* two-way ANOVA */
for (t=t1; t<=t2; t++) {
if (anova_obs_ok(y, xt, xb, t)) {
for (i=0; ival[i]) {
v.rmeans[i] += y[t];
v.rcount[i] += 1;
break;
}
}
}
}
for (i=0; ival[i]) {
dev = y[t] - v.cmeans[i];
v.SSE += dev * dev;
break;
}
}
}
}
v.SSTr = v.SST - v.SSE;
}
anova_add_F_stat(&v);
if (!(opt & OPT_Q)) {
const char *yname = dset->varname[list[1]];
const char *tname = dset->varname[list[2]];
pputc(prn, '\n');
pprintf(prn, _("%s, response = %s, treatment = %s:"),
_("Analysis of Variance"), yname, tname);
err = print_anova(&v, prn);
if (!err && v.nb == 0) {
anova_print_means(&v, xt, y, ybar, t1, t2, prn);
}
}
bailout:
anova_free(&v);
return err;
}