/* gretl - The Gnu Regression, Econometrics and Time-series Library * Copyright (C) 1999-2002 Allin Cottrell * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this software; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /* nistcheck -- program to check libgretl estimation functions against the NIST reference datasets for linear regression. */ #include "libgretl.h" #include "version.h" #include #include #ifdef LONGLEY_ONLY # define MAX_DIGITS 12 #else # define MAX_DIGITS 12 #endif #define MIN_DIGITS 4 #define MP_CHECK_DIGITS 12 #define MP_PRINT_DIGITS 15 #define LIBGRETLSTR "Standard libgretl:" static int verbose; static int noint; static char datadir[FILENAME_MAX]; typedef struct mp_results_ mp_results; struct mp_results_ { int ncoeff; double *coeff; double *sderr; double sigma; double ess; double rsq; double fstt; }; void free_mp_results (mp_results *mpvals) { if (mpvals != NULL) { free(mpvals->coeff); free(mpvals->sderr); free(mpvals); } } mp_results *mp_results_new (int nc) { mp_results *mpvals; int i; mpvals = malloc(sizeof *mpvals); if (mpvals == NULL) { return NULL; } mpvals->coeff = NULL; mpvals->sderr = NULL; mpvals->ncoeff = nc; mpvals->coeff = malloc(nc * sizeof *mpvals->coeff); mpvals->sderr = malloc(nc * sizeof *mpvals->sderr); if (mpvals->coeff == NULL || mpvals->sderr == NULL) { free_mp_results(mpvals); return NULL; } for (i=0; icoeff[i] = NADBL; mpvals->sderr[i] = NADBL; } mpvals->sigma = mpvals->ess = NADBL; mpvals->rsq = mpvals->fstt = NADBL; return mpvals; } /* special stuff: Noint1, NoInt2: no intercept, use alternative R^2 calculation Filip: create powers of x, 2 through 10 Wampler1 - Wampler5: create powers of x, 2 to 5 */ static int get_data_digits (const char *s) { int digits = 0; while (*s == '-' || *s == '.' || *s == ',' || isdigit((unsigned char) *s)) { if (isdigit((unsigned char) *s)) digits++; s++; } return digits; } static int grab_nist_data (FILE *fp, DATASET *dset, int *zdigits, int polyterms, PRN *prn) { double xx; int i, t, dit; int realvars = dset->v - polyterms; char numstr[64]; if (verbose > 1) { pputs(prn, "\nGetting data...\n\n"); } for (i=1; ivarname[i], "y"); } else if (realvars > 3) { sprintf(dset->varname[i], "x%d", i - 1); } else { strcpy(dset->varname[i], "x"); } if (verbose > 1) { pprintf(prn, "reading variable %d as '%s'\n", i, dset->varname[i]); } } for (t=0; tn; t++) { for (i=1; i zdigits[i]) { zdigits[i] = dit; } } xx = atof(numstr); } dset->Z[i][t] = xx; } } return 0; } static int grab_mp_results (FILE *fp, mp_results *certvals, int nlines, PRN *prn) { int i = 0, lcount = 0, check; char line[MAXLEN]; if (verbose > 1) { pputs(prn, "\nGetting certified values...\n\n"); } for (lcount=0; lcountcoeff[i], &certvals->sderr[i]) == 3) { if (verbose > 1) { pprintf(prn, " B%d: coeff = %.10g, std. error = %.10g\n", check, certvals->coeff[i], certvals->sderr[i]); } i++; } if (na(certvals->sigma) && sscanf(line, " Standard Deviation %lf", &certvals->sigma) == 1) { if (verbose > 1) { pprintf(prn, " sigma = %.10g\n", certvals->sigma); } } if (na(certvals->rsq) && sscanf(line, " R-Squared %lf", &certvals->rsq) == 1) { if (verbose > 1) { pprintf(prn, " R^2 = %.10g\n", certvals->rsq); } } if (na(certvals->fstt) && sscanf(line, "Regression %*d %*f %*f %lf", &certvals->fstt) == 1) { if (verbose > 1) { pprintf(prn, " F = %.10g\n", certvals->fstt); } } if (na(certvals->ess) && sscanf(line, "Residual %*d %lf %*f", &certvals->ess) == 1) { if (verbose > 1) { pprintf(prn, " ESS = %.10g\n", certvals->ess); } } } return 0; } static void get_difficulty_level (const char *line, char *s) { size_t i, len; while (isspace((unsigned char) *line)) line++; strncat(s, line, 47); len = strlen(s); for (i=len-1; i>0; i--) { if (isspace((unsigned char) s[i])) s[i] = 0; else break; } } #define mylog10(x) (log(x) / 2.3025850929940459) static double log_error (double q, double c, PRN *prn) { double le = 0.0; int lae = 0; if (q == c) { le = 15.0; } else if (isinf(c)) { /* certval is inf (e.g. F-stat in some cases): can't really handle this? */ if (na(q) || isinf(q)) { le = 15.0; } else { le = -log(0); } } else if (c == 0.0) { le = -mylog10(fabs(q)); } else { le = -mylog10(fabs(q - c) / fabs(c)); } if (isnan(le)) { pprintf(prn, "q = %g, c = %g\n", q, c); } else { pprintf(prn, "%10.3f %s\n", le, (lae)? "(log abs error)" : ""); } return le; } static int allocate_data_digits (const DATASET *dinfo, int **zdigits) { int *zd; int i; zd = malloc(dinfo->v * sizeof *zd); if (zd == NULL) return 1; for (i=0; iv; i++) { zd[i] = 0; } *zdigits = zd; return 0; } static int read_nist_file (const char *fname, DATASET **pdset, mp_results **pcertvals, int *polyterms, int **zdigits, PRN *prn) { FILE *fp; char *p, line[MAXLEN], difficulty[48]; gchar *fullname; int cstart = 0, cstop = 0; int dstart = 0, dstop = 0; int lcount = 0, nvar = 0, nobs = 0; DATASET *dset = NULL; mp_results *certvals = NULL; int i, t, npoly = 0; #ifdef WIN32 fullname = g_strdup_printf("%s\\%s", datadir, fname); fp = gretl_fopen(fullname, "r"); #else fullname = g_strdup_printf("%s/%s", datadir, fname); fp = gretl_fopen(fullname, "r"); #endif g_free(fullname); if (fp == NULL) { pprintf(prn, "Couldn't open %s\n", fname); return 1; } pprintf(prn, "\n *** %s ***\n", fname); if (verbose) pputc(prn, '\n'); /* allow for generated data: powers of x */ if (strstr(fname, "Pontius")) { npoly = 1; } if (strstr(fname, "Filip")) { npoly = 9; } if (strstr(fname, "Wampler")) { npoly = 4; } if (strstr(fname, "NoInt")) { noint = 1; } else { noint = 0; } *difficulty = 0; while (fgets(line, MAXLEN-1, fp)) { lcount++; /* level of difficulty? */ if (*difficulty == 0 && strstr(line, "Level of Difficulty")) { get_difficulty_level(line, difficulty); if (*difficulty) { pprintf(prn, "(\"%s\")\n", difficulty); } } /* where are the certified results? */ if (cstart == 0 && (p = strstr(line, "Certified")) != NULL) { if (sscanf(p, "Certified Values (lines %d to %d)", &cstart, &cstop) == 2) { ; } } /* where are the data? */ if (dstart == 0 && (p = strstr(line, "Data")) != NULL) { if (sscanf(p, "Data (lines %d to %d)", &dstart, &dstop) == 2) { ; } } /* how many variables are there? */ if (nvar == 0 && (p = strstr(line, "Predictor")) != NULL) { if (sscanf(line, "%d Predictor Variable", &nvar) == 1) { nvar++; if (verbose) pprintf(prn, " Number of variables: %d\n", nvar); } } /* how many observations are there? */ if (nobs == 0 && (p = strstr(line, "Observations")) != NULL) { if (sscanf(line, "%d Observations", &nobs) == 1) { if (verbose) pprintf(prn, " Number of observations: %d\n", nobs); } } /* allocate results struct once we know its size */ if (nvar > 0 && nobs > 0 && certvals == NULL) { int nc = nvar - 1 + npoly; if (!noint) nc++; certvals = mp_results_new(nc); if (certvals == NULL) { fclose(fp); return 1; } } /* allocate data matrix once we know its size */ if (nvar > 0 && nobs > 0 && dset == NULL) { dset = create_auxiliary_dataset(nvar + 1 + npoly, nobs, 0); if (dset == NULL) { free_mp_results(certvals); fclose(fp); return 1; } if (allocate_data_digits(dset, zdigits)) { free_mp_results(certvals); free_datainfo(dset); fclose(fp); return 1; } } /* read the certified results */ if (cstart > 0 && lcount == cstart - 1) { if (certvals == NULL) { pputs(prn, "Results coming but storage is not " "allocated: file is problematic\n"); fclose(fp); return 1; } else { int nlines = cstop - cstart + 1; if (grab_mp_results(fp, certvals, nlines, prn)) { fclose(fp); return 1; } lcount += nlines; } } /* read the data */ if (dstart > 0 && lcount == dstart - 1) { if (dset == NULL || dset->Z == NULL) { pputs(prn, "Data coming but data matrix is not " "allocated: file is problematic\n"); fclose(fp); return 1; } else if (grab_nist_data(fp, dset, *zdigits, npoly, prn)) { fclose(fp); return 1; } } /* end if ready to grab data */ } /* end main fgets loop */ if (verbose > 1) { if (dset != NULL && dset->Z != NULL) { int i, t; for (t=0; tZ[i][t]); pputc(prn, ((i == nvar)? '\n' : ' ')); } } } } if (npoly && verbose) { pputc(prn, '\n'); } for (i=2; i<=npoly+1; i++) { if (verbose) { pprintf(prn, "Generating var %d, 'x^%d' = x ** %d\n", i+1, i, i); } sprintf(dset->varname[i+1], "x^%d", i); for (t=0; tn; t++) { dset->Z[i+1][t] = pow(dset->Z[2][t], i); } } fclose(fp); *pdset = dset; *pcertvals = certvals; *polyterms = npoly; return 0; } static double get_accuracy (MODEL *pmod, mp_results *certvals, PRN *prn) { PRN *vprn; char label[32]; double le, lemin = 32; int i; vprn = (verbose)? prn : NULL; pprintf(vprn, "\nstatistic log relative error\n\n"); for (i=0; incoeff; i++) { sprintf(label, "B[%d]", i); pprintf(vprn, "%-12s", label); le = log_error(pmod->coeff[i], certvals->coeff[i], vprn); if (le < lemin) { lemin = le; } sprintf(label, "Std.Err."); pprintf(vprn, "%-12s", label); le = log_error(pmod->sderr[i], certvals->sderr[i], vprn); if (le < lemin) { lemin = le; } } pprintf(vprn, "%-12s", "sigma"); le = log_error(pmod->sigma, certvals->sigma, vprn); if (le < lemin) { lemin = le; } pprintf(vprn, "%-12s", "ESS"); le = log_error(pmod->ess, certvals->ess, vprn); if (le < lemin) { lemin = le; } pprintf(vprn, "%-12s", "R-squared"); le = log_error(pmod->rsq, certvals->rsq, vprn); if (le < lemin) { lemin = le; } pprintf(vprn, "%-12s", "F-stat"); le = log_error(pmod->fstt, certvals->fstt, vprn); if (le < lemin) { lemin = le; } return lemin; } static void print_nist_summary (int ntests, int missing, int modelerrs, int poorvals, int mpfails, const char *prog, PRN *prn) { pprintf(prn, "\nSummary of NIST linear regression test results:\n" " * number of tests carried out: %d\n" " * reference data files missing or corrupted: %d\n" " * unexpected errors in estimation of models: %d\n" " * poor or unacceptable results with libgretl: %d\n" " * cases where results from the gretl GMP plugin disagreed with the NIST\n" " certified values, at %d significant figures: %d\n", ntests - missing, missing, modelerrs, poorvals, MP_CHECK_DIGITS, mpfails); #ifdef STANDALONE pprintf(prn, "\nYou may run '%s -v' or '%s -vv' for details\n\n", prog, prog); #endif } # ifdef STANDALONE static void *get_mplsq (void); # endif static int run_gretl_mp_comparison (DATASET *dset, mp_results *certvals, int npoly, const int *zdigits, int *mpfails, PRN *prn) { int (*mplsq)(const int *, const int *, const int *, const DATASET *, MODEL *, gretlopt); int *list = NULL, *polylist = NULL; int i, err = 0; int realv = dset->v - npoly; MODEL model; double acc; gretl_model_init(&model, dset); /* create regression list */ list = gretl_list_new(realv); if (list == NULL) return 1; if (noint) { list[0] = realv - 1; for (i=1; i<=list[0]; i++) { list[i] = i; } } else { list[0] = realv; list[1] = 1; list[2] = 0; for (i=3; i<=list[0]; i++) { list[i] = i - 1; } } /* set up list of polynomial terms, if needed */ if (npoly) { polylist = gretl_list_new(npoly); if (polylist == NULL) { free(list); return 1; } for (i=1; i<=npoly; i++) { polylist[i] = i + 1; } } #ifdef STANDALONE mplsq = get_mplsq(); #else mplsq = get_plugin_function("mplsq"); #endif if (mplsq == NULL) { pputs(prn, "Couldn't load mplsq function\n"); err = 1; } if (!err) { err = (*mplsq)(list, polylist, zdigits, dset, &model, OPT_NONE); } free(list); free(polylist); if (verbose) { pprintf(prn, "\nChecking gretl multiple-precision results (%d coefficients):\n\n" "%44s%24s\n\n", certvals->ncoeff, "certified", "libgretl"); for (i=0; incoeff; i++) { char label[24]; if (!na(certvals->coeff[i])) { sprintf(label, "B[%d] estimate", i); pprintf(prn, " %-20s %#24.*g %#24.*g\n", label, MP_PRINT_DIGITS, certvals->coeff[i], MP_PRINT_DIGITS, model.coeff[i]); } if (!na(certvals->sderr[i])) { pprintf(prn, " %-20s %#24.*g %#24.*g\n", "(std. error)", MP_PRINT_DIGITS, certvals->sderr[i], MP_PRINT_DIGITS, model.sderr[i]); } } pputc(prn, '\n'); pprintf(prn, " %-20s %#24.*g %#24.*g\n" " %-20s %#24.*g %#24.*g\n" " %-20s %#24.*g %#24.*g\n" " %-20s %#24.*g %#24.*g\n", "standard error", MP_PRINT_DIGITS, certvals->sigma, MP_PRINT_DIGITS, model.sigma, "error sum of squares", MP_PRINT_DIGITS, certvals->ess, MP_PRINT_DIGITS, model.ess, "R-squared", MP_PRINT_DIGITS, certvals->rsq, MP_PRINT_DIGITS, model.rsq, "F", MP_PRINT_DIGITS, certvals->fstt, MP_PRINT_DIGITS, model.fstt); } acc = get_accuracy(&model, certvals, prn); if (verbose) { pputc(prn, '\n'); } if (acc < 12.0) { *mpfails += 1; pprintf(prn, "* Using gretl GMP plugin: errors found when using" " %d significant figures\n (worst-case log relative error = %.3f)\n", MP_CHECK_DIGITS, acc); } else { pprintf(prn, "* Using gretl GMP plugin: results correct to" " at least %d digits\n", (int) acc); pprintf(prn, " (worst-case log relative error = %.3f)\n", acc); } clear_model(&model); return err; } static int run_gretl_comparison (const char *datname, DATASET *dset, mp_results *certvals, int *errs, int *poor, PRN *prn) { int *list = NULL; MODEL *model = NULL; double acc; int wc = 0; int i; static int modelnum; list = gretl_list_new(dset->v); if (list == NULL) return 1; if (noint) { list[0] = dset->v - 1; for (i=1; i<=list[0]; i++) { list[i] = i; } } else { list[0] = dset->v; list[1] = 1; list[2] = 0; for (i=3; i<=list[0]; i++) { list[i] = i - 1; } } model = gretl_model_new(); *model = lsq(list, dset, OLS, OPT_Z); if (model->errcode) { if (verbose) { pputc(prn, '\n'); } pprintf(prn, "gretl error code: %d\n", model->errcode); errmsg(model->errcode, prn); if (strcmp(datname, "Filip.dat") == 0 && model->errcode == E_SINGULAR) { pputs(prn, "(This error was expected with standard libgretl)\n"); } else { *errs += 1; } goto free_stuff; } if (verbose) { int i; model->ID = ++modelnum; printmodel(model, dset, OPT_NONE, prn); for (i=0; incoeff; i++) { pprintf(prn, " gretl coefficient[%d] = %#.10g\n", i, model->coeff[i]); } } /* special treatment when there's no intercept */ if (noint) { double xx = 0.0; int t; for (t=0; tn; t++) { xx += dset->Z[1][t] * dset->Z[1][t]; } model->rsq = 1.0 - model->ess / xx; } acc = get_accuracy(model, certvals, prn); if (verbose) { pputs(prn, "\n ***"); } if (acc >= 6.0) { pprintf(prn, "* %s results correct to at least %d digits\n", LIBGRETLSTR, (int) acc); } else if (acc >= MIN_DIGITS) { if (strcmp(datname, "Filip.dat") && strcmp(datname, "Wampler5.dat")) { pprintf(prn, "* %s results correct to only %d digits: " "POOR\n", LIBGRETLSTR, (int) acc); *poor += 1; } else { pprintf(prn, "* %s results correct to at least %.2f digits\n" " (OK on Filip.dat and Wampler5.dat)\n", LIBGRETLSTR, acc); wc = 1; } } else { pprintf(prn, "* %s results correct to less than " "%d digits: UNACCEPTABLE\n", LIBGRETLSTR, MIN_DIGITS); *poor += 1; } if (!wc) { pprintf(prn, " (worst-case log relative error = %.3f)\n", acc); } if (verbose) { pputc(prn, '\n'); } free_stuff: free(list); gretl_model_free(model); return 0; } #ifdef STANDALONE int main (int argc, char *argv[]) { int j; PRN *prn; DATASET *dataset; mp_results *certvals = NULL; int ntests, missing = 0, modelerrs = 0, poorvals = 0; int polyterms = 0, mpfails = 0; int *zdigits = NULL; const char *prog; # ifdef LONGLEY_ONLY const char *nist_files[] = { "Longley.dat" }; # else const char *nist_files[] = { "Norris.dat", "Pontius.dat", "NoInt1.dat", "NoInt2.dat", "Filip.dat", "Longley.dat", "Wampler1.dat", "Wampler2.dat", "Wampler3.dat", "Wampler4.dat", "Wampler5.dat" }; # endif /* LONGLEY_ONLY */ ntests = sizeof nist_files / sizeof *nist_files; prog = argv[0]; if (argc >= 2 && strcmp(argv[1], "-v") == 0) verbose = 1; if (argc >= 2 && strcmp(argv[1], "-vv") == 0) verbose = 2; strcpy(datadir, "."); if (argc == 2 && argv[1][0] != '-') { strcpy(datadir, argv[1]); } else if (argc == 3) { strcpy(datadir, argv[2]); } libgretl_init(); prn = gretl_print_new(GRETL_PRINT_STDOUT, NULL); for (j=0; j # else # include # endif static void *get_mplsq (void) { void *handle; void *funp; # ifdef _WIN32 handle = LoadLibrary("plugins\\mp_ols.dll"); if (handle == NULL) return NULL; # else handle = dlopen("../plugin/.libs/mp_ols.so", RTLD_LAZY); if (handle == NULL) { fputs(dlerror(), stderr); return NULL; } # endif /* _WIN32 */ # ifdef _WIN32 funp = GetProcAddress(handle, "mplsq"); # else funp = dlsym(handle, "mplsq"); if (funp == NULL) { /* try munged version */ funp = dlsym(handle, "_mplsq"); if (funp == NULL) { fputs(dlerror(), stderr); } } # endif /* _WIN32 */ if (funp == NULL) { close_plugin(handle); } return funp; } #endif /* STANDALONE */