1 /* gretl - The Gnu Regression, Econometrics and Time-series Library
2  * Copyright (C) 1999-2002 Allin Cottrell
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This software is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this software; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 02111-1307, USA.
18  */
19 
20 /* nistcheck -- program to check libgretl estimation functions against
21    the NIST reference datasets for linear regression. */
22 
23 #include "libgretl.h"
24 #include "version.h"
25 
26 #include <string.h>
27 #include <float.h>
28 
29 #ifdef LONGLEY_ONLY
30 # define MAX_DIGITS 12
31 #else
32 # define MAX_DIGITS 12
33 #endif
34 
35 #define MIN_DIGITS 4
36 #define MP_CHECK_DIGITS 12
37 #define MP_PRINT_DIGITS 15
38 
39 #define LIBGRETLSTR "Standard libgretl:"
40 
41 static int verbose;
42 static int noint;
43 
44 static char datadir[FILENAME_MAX];
45 
46 typedef struct mp_results_ mp_results;
47 
48 struct mp_results_ {
49     int ncoeff;
50     double *coeff;
51     double *sderr;
52     double sigma;
53     double ess;
54     double rsq;
55     double fstt;
56 };
57 
free_mp_results(mp_results * mpvals)58 void free_mp_results (mp_results *mpvals)
59 {
60     if (mpvals != NULL) {
61 	free(mpvals->coeff);
62 	free(mpvals->sderr);
63 	free(mpvals);
64     }
65 }
66 
mp_results_new(int nc)67 mp_results *mp_results_new (int nc)
68 {
69     mp_results *mpvals;
70     int i;
71 
72     mpvals = malloc(sizeof *mpvals);
73     if (mpvals == NULL) {
74 	return NULL;
75     }
76 
77     mpvals->coeff = NULL;
78     mpvals->sderr = NULL;
79 
80     mpvals->ncoeff = nc;
81 
82     mpvals->coeff = malloc(nc * sizeof *mpvals->coeff);
83     mpvals->sderr = malloc(nc * sizeof *mpvals->sderr);
84 
85     if (mpvals->coeff == NULL ||
86 	mpvals->sderr == NULL) {
87 	free_mp_results(mpvals);
88 	return NULL;
89     }
90 
91     for (i=0; i<nc; i++) {
92 	mpvals->coeff[i] = NADBL;
93 	mpvals->sderr[i] = NADBL;
94     }
95 
96     mpvals->sigma = mpvals->ess = NADBL;
97     mpvals->rsq = mpvals->fstt = NADBL;
98 
99     return mpvals;
100 }
101 
102 /* special stuff:
103 
104 Noint1, NoInt2: no intercept, use alternative R^2 calculation
105 Filip: create powers of x, 2 through 10
106 Wampler1 - Wampler5: create powers of x, 2 to 5
107 
108 */
109 
get_data_digits(const char * s)110 static int get_data_digits (const char *s)
111 {
112     int digits = 0;
113 
114     while (*s == '-' || *s == '.' || *s == ',' ||
115 	   isdigit((unsigned char) *s)) {
116 	if (isdigit((unsigned char) *s)) digits++;
117 	s++;
118     }
119 
120     return digits;
121 }
122 
grab_nist_data(FILE * fp,DATASET * dset,int * zdigits,int polyterms,PRN * prn)123 static int grab_nist_data (FILE *fp, DATASET *dset,
124 			   int *zdigits, int polyterms,
125 			   PRN *prn)
126 {
127     double xx;
128     int i, t, dit;
129     int realvars = dset->v - polyterms;
130     char numstr[64];
131 
132     if (verbose > 1) {
133 	pputs(prn, "\nGetting data...\n\n");
134     }
135 
136     for (i=1; i<realvars; i++) {
137 	if (i == 1) {
138 	    strcpy(dset->varname[i], "y");
139 	} else if (realvars > 3) {
140 	    sprintf(dset->varname[i], "x%d", i - 1);
141 	} else {
142 	    strcpy(dset->varname[i], "x");
143 	}
144 	if (verbose > 1) {
145 	    pprintf(prn, "reading variable %d as '%s'\n",
146 		    i, dset->varname[i]);
147 	}
148     }
149 
150     for (t=0; t<dset->n; t++) {
151 	for (i=1; i<realvars; i++) {
152 	    if (fscanf(fp, "%s", numstr) != 1) {
153 		pputs(prn, "Data ended prematurely\n");
154 		return 1;
155 	    } else {
156 		if (zdigits != NULL) {
157 		    dit = get_data_digits(numstr);
158 		    if (dit > zdigits[i]) {
159 			zdigits[i] = dit;
160 		    }
161 		}
162 		xx = atof(numstr);
163 	    }
164 	    dset->Z[i][t] = xx;
165 	}
166     }
167 
168     return 0;
169 }
170 
grab_mp_results(FILE * fp,mp_results * certvals,int nlines,PRN * prn)171 static int grab_mp_results (FILE *fp, mp_results *certvals,
172 			    int nlines, PRN *prn)
173 {
174     int i = 0, lcount = 0, check;
175     char line[MAXLEN];
176 
177     if (verbose > 1) {
178 	pputs(prn, "\nGetting certified values...\n\n");
179     }
180 
181     for (lcount=0; lcount<nlines; lcount++) {
182 
183 	if (fgets(line, MAXLEN-1, fp) == NULL) {
184 	    pputs(prn, "Results ended prematurely\n");
185 	    return 1;
186 	}
187 
188 	if (sscanf(line, " B%d %lf %lf", &check, &certvals->coeff[i],
189 		   &certvals->sderr[i]) == 3) {
190 	    if (verbose > 1) {
191 		pprintf(prn, " B%d: coeff = %.10g, std. error = %.10g\n",
192 			check, certvals->coeff[i], certvals->sderr[i]);
193 	    }
194 	    i++;
195 	}
196 
197 	if (na(certvals->sigma) &&
198 	    sscanf(line, " Standard Deviation %lf", &certvals->sigma) == 1) {
199 	    if (verbose > 1) {
200 		pprintf(prn, " sigma = %.10g\n", certvals->sigma);
201 	    }
202 	}
203 
204 	if (na(certvals->rsq) &&
205 	    sscanf(line, " R-Squared %lf", &certvals->rsq) == 1) {
206 	    if (verbose > 1) {
207 		pprintf(prn, " R^2 = %.10g\n", certvals->rsq);
208 	    }
209 	}
210 
211 	if (na(certvals->fstt) &&
212 	    sscanf(line, "Regression %*d %*f %*f %lf", &certvals->fstt) == 1) {
213 	    if (verbose > 1) {
214 		pprintf(prn, " F = %.10g\n", certvals->fstt);
215 	    }
216 	}
217 
218 	if (na(certvals->ess) &&
219 	    sscanf(line, "Residual %*d %lf %*f", &certvals->ess) == 1) {
220 	    if (verbose > 1) {
221 		pprintf(prn, " ESS = %.10g\n", certvals->ess);
222 	    }
223 	}
224 
225     }
226 
227     return 0;
228 }
229 
230 static
get_difficulty_level(const char * line,char * s)231 void get_difficulty_level (const char *line, char *s)
232 {
233     size_t i, len;
234 
235     while (isspace((unsigned char) *line)) line++;
236 
237     strncat(s, line, 47);
238     len = strlen(s);
239 
240     for (i=len-1; i>0; i--) {
241 	if (isspace((unsigned char) s[i])) s[i] = 0;
242 	else break;
243     }
244 }
245 
246 #define mylog10(x) (log(x) / 2.3025850929940459)
247 
log_error(double q,double c,PRN * prn)248 static double log_error (double q, double c, PRN *prn)
249 {
250     double le = 0.0;
251     int lae = 0;
252 
253     if (q == c) {
254 	le = 15.0;
255     } else if (isinf(c)) {
256 	/* certval is inf (e.g. F-stat in some cases):
257 	   can't really handle this? */
258 	if (na(q) || isinf(q)) {
259 	    le = 15.0;
260 	} else {
261 	    le = -log(0);
262 	}
263     } else if (c == 0.0) {
264 	le = -mylog10(fabs(q));
265     } else {
266 	le = -mylog10(fabs(q - c) / fabs(c));
267     }
268 
269     if (isnan(le)) {
270 	pprintf(prn, "q = %g, c = %g\n", q, c);
271     } else {
272 	pprintf(prn, "%10.3f %s\n", le, (lae)? "(log abs error)" : "");
273     }
274 
275     return le;
276 }
277 
allocate_data_digits(const DATASET * dinfo,int ** zdigits)278 static int allocate_data_digits (const DATASET *dinfo, int **zdigits)
279 {
280     int *zd;
281     int i;
282 
283     zd = malloc(dinfo->v * sizeof *zd);
284     if (zd == NULL) return 1;
285 
286     for (i=0; i<dinfo->v; i++) {
287 	zd[i] = 0;
288     }
289 
290     *zdigits = zd;
291 
292     return 0;
293 }
294 
read_nist_file(const char * fname,DATASET ** pdset,mp_results ** pcertvals,int * polyterms,int ** zdigits,PRN * prn)295 static int read_nist_file (const char *fname,
296 			   DATASET **pdset,
297 			   mp_results **pcertvals,
298 			   int *polyterms,
299 			   int **zdigits,
300 			   PRN *prn)
301 {
302     FILE *fp;
303     char *p, line[MAXLEN], difficulty[48];
304     gchar *fullname;
305     int cstart = 0, cstop = 0;
306     int dstart = 0, dstop = 0;
307     int lcount = 0, nvar = 0, nobs = 0;
308     DATASET *dset = NULL;
309     mp_results *certvals = NULL;
310     int i, t, npoly = 0;
311 
312 #ifdef WIN32
313     fullname = g_strdup_printf("%s\\%s", datadir, fname);
314     fp = gretl_fopen(fullname, "r");
315 #else
316     fullname = g_strdup_printf("%s/%s", datadir, fname);
317     fp = gretl_fopen(fullname, "r");
318 #endif
319     g_free(fullname);
320 
321     if (fp == NULL) {
322 	pprintf(prn, "Couldn't open %s\n", fname);
323 	return 1;
324     }
325 
326     pprintf(prn, "\n *** %s ***\n", fname);
327     if (verbose) pputc(prn, '\n');
328 
329     /* allow for generated data: powers of x */
330     if (strstr(fname, "Pontius")) {
331 	npoly = 1;
332     }
333     if (strstr(fname, "Filip")) {
334 	npoly = 9;
335     }
336     if (strstr(fname, "Wampler")) {
337 	npoly = 4;
338     }
339 
340     if (strstr(fname, "NoInt")) {
341 	noint = 1;
342     } else {
343 	noint = 0;
344     }
345 
346     *difficulty = 0;
347 
348     while (fgets(line, MAXLEN-1, fp)) {
349 
350 	lcount++;
351 
352 	/* level of difficulty? */
353 	if (*difficulty == 0 && strstr(line, "Level of Difficulty")) {
354 	    get_difficulty_level(line, difficulty);
355 	    if (*difficulty) {
356 		pprintf(prn, "(\"%s\")\n", difficulty);
357 	    }
358 	}
359 
360 	/* where are the certified results? */
361 	if (cstart == 0 && (p = strstr(line, "Certified")) != NULL) {
362 	    if (sscanf(p, "Certified Values (lines %d to %d)",
363 		       &cstart, &cstop) == 2) {
364 		;
365 	    }
366 	}
367 
368 	/* where are the data? */
369 	if (dstart == 0 && (p = strstr(line, "Data")) != NULL) {
370 	    if (sscanf(p, "Data (lines %d to %d)",
371 		       &dstart, &dstop) == 2) {
372 		;
373 	    }
374 	}
375 
376 	/* how many variables are there? */
377 	if (nvar == 0 && (p = strstr(line, "Predictor")) != NULL) {
378 	    if (sscanf(line, "%d Predictor Variable", &nvar) == 1) {
379 		nvar++;
380 		if (verbose) pprintf(prn, " Number of variables: %d\n", nvar);
381 	    }
382 	}
383 
384 	/* how many observations are there? */
385 	if (nobs == 0 && (p = strstr(line, "Observations")) != NULL) {
386 	    if (sscanf(line, "%d Observations", &nobs) == 1) {
387 		if (verbose) pprintf(prn, " Number of observations: %d\n", nobs);
388 	    }
389 	}
390 
391 	/* allocate results struct once we know its size */
392 	if (nvar > 0 && nobs > 0 && certvals == NULL) {
393 	    int nc = nvar - 1 + npoly;
394 
395 	    if (!noint) nc++;
396 	    certvals = mp_results_new(nc);
397 	    if (certvals == NULL) {
398 		fclose(fp);
399 		return 1;
400 	    }
401 	}
402 
403 	/* allocate data matrix once we know its size */
404 	if (nvar > 0 && nobs > 0 && dset == NULL) {
405 
406 	    dset = create_auxiliary_dataset(nvar + 1 + npoly,
407 					    nobs, 0);
408 	    if (dset == NULL) {
409 		free_mp_results(certvals);
410 		fclose(fp);
411 		return 1;
412 	    }
413 
414 	    if (allocate_data_digits(dset, zdigits)) {
415 		free_mp_results(certvals);
416 		free_datainfo(dset);
417 		fclose(fp);
418 		return 1;
419 	    }
420 	}
421 
422 	/* read the certified results */
423 	if (cstart > 0 && lcount == cstart - 1) {
424 	    if (certvals == NULL) {
425 		pputs(prn, "Results coming but storage is not "
426 		      "allocated: file is problematic\n");
427 		fclose(fp);
428 		return 1;
429 	    } else {
430 		int nlines = cstop - cstart + 1;
431 
432 		if (grab_mp_results(fp, certvals, nlines, prn)) {
433 		    fclose(fp);
434 		    return 1;
435 		}
436 		lcount += nlines;
437 	    }
438 	}
439 
440 	/* read the data */
441 	if (dstart > 0 && lcount == dstart - 1) {
442 	    if (dset == NULL || dset->Z == NULL) {
443 		pputs(prn, "Data coming but data matrix is not "
444 		      "allocated: file is problematic\n");
445 		fclose(fp);
446 		return 1;
447 	    } else if (grab_nist_data(fp, dset, *zdigits, npoly, prn)) {
448 		fclose(fp);
449 		return 1;
450 	    }
451 	} /* end if ready to grab data */
452 
453     } /* end main fgets loop */
454 
455     if (verbose > 1) {
456 	if (dset != NULL && dset->Z != NULL) {
457 	    int i, t;
458 
459 	    for (t=0; t<nobs; t++) {
460 		for (i=1; i<=nvar; i++) {
461 		    pprintf(prn, "%#.20g", dset->Z[i][t]);
462 		    pputc(prn, ((i == nvar)? '\n' : ' '));
463 		}
464 	    }
465 	}
466     }
467 
468     if (npoly && verbose) {
469 	pputc(prn, '\n');
470     }
471 
472     for (i=2; i<=npoly+1; i++) {
473 	if (verbose) {
474 	    pprintf(prn, "Generating var %d, 'x^%d' = x ** %d\n", i+1, i, i);
475 	}
476 	sprintf(dset->varname[i+1], "x^%d", i);
477 	for (t=0; t<dset->n; t++) {
478 	    dset->Z[i+1][t] = pow(dset->Z[2][t], i);
479 	}
480     }
481 
482     fclose(fp);
483 
484     *pdset = dset;
485     *pcertvals = certvals;
486     *polyterms = npoly;
487 
488     return 0;
489 }
490 
491 static
get_accuracy(MODEL * pmod,mp_results * certvals,PRN * prn)492 double get_accuracy (MODEL *pmod, mp_results *certvals, PRN *prn)
493 {
494     PRN *vprn;
495     char label[32];
496     double le, lemin = 32;
497     int i;
498 
499     vprn = (verbose)? prn : NULL;
500 
501     pprintf(vprn, "\nstatistic   log relative error\n\n");
502 
503     for (i=0; i<pmod->ncoeff; i++) {
504 	sprintf(label, "B[%d]", i);
505 	pprintf(vprn, "%-12s", label);
506 	le = log_error(pmod->coeff[i], certvals->coeff[i], vprn);
507 	if (le < lemin) {
508 	    lemin = le;
509 	}
510 	sprintf(label, "Std.Err.");
511 	pprintf(vprn, "%-12s", label);
512 	le = log_error(pmod->sderr[i], certvals->sderr[i], vprn);
513 	if (le < lemin) {
514 	    lemin = le;
515 	}
516     }
517 
518     pprintf(vprn, "%-12s", "sigma");
519     le = log_error(pmod->sigma, certvals->sigma, vprn);
520     if (le < lemin) {
521 	lemin = le;
522     }
523 
524 
525     pprintf(vprn, "%-12s", "ESS");
526     le = log_error(pmod->ess, certvals->ess, vprn);
527     if (le < lemin) {
528 	lemin = le;
529     }
530 
531     pprintf(vprn, "%-12s", "R-squared");
532     le = log_error(pmod->rsq, certvals->rsq, vprn);
533     if (le < lemin) {
534 	lemin = le;
535     }
536 
537     pprintf(vprn, "%-12s", "F-stat");
538     le = log_error(pmod->fstt, certvals->fstt, vprn);
539     if (le < lemin) {
540 	lemin = le;
541     }
542 
543     return lemin;
544 }
545 
546 static
print_nist_summary(int ntests,int missing,int modelerrs,int poorvals,int mpfails,const char * prog,PRN * prn)547 void print_nist_summary (int ntests, int missing, int modelerrs,
548 			 int poorvals, int mpfails,
549 			 const char *prog, PRN *prn)
550 {
551     pprintf(prn, "\nSummary of NIST linear regression test results:\n"
552 	    " * number of tests carried out: %d\n"
553 	    " * reference data files missing or corrupted: %d\n"
554 	    " * unexpected errors in estimation of models: %d\n"
555 	    " * poor or unacceptable results with libgretl: %d\n"
556 	    " * cases where results from the gretl GMP plugin disagreed with the NIST\n"
557 	    "   certified values, at %d significant figures: %d\n",
558 	    ntests - missing, missing, modelerrs, poorvals,
559 	    MP_CHECK_DIGITS, mpfails);
560 
561 #ifdef STANDALONE
562     pprintf(prn, "\nYou may run '%s -v' or '%s -vv' for details\n\n",
563 	    prog, prog);
564 #endif
565 }
566 
567 # ifdef STANDALONE
568 static void *get_mplsq (void);
569 # endif
570 
571 static
run_gretl_mp_comparison(DATASET * dset,mp_results * certvals,int npoly,const int * zdigits,int * mpfails,PRN * prn)572 int run_gretl_mp_comparison (DATASET *dset,
573 			     mp_results *certvals, int npoly,
574 			     const int *zdigits, int *mpfails,
575 			     PRN *prn)
576 {
577     int (*mplsq)(const int *, const int *, const int *,
578 		 const DATASET *, MODEL *, gretlopt);
579     int *list = NULL, *polylist = NULL;
580     int i, err = 0;
581     int realv = dset->v - npoly;
582     MODEL model;
583     double acc;
584 
585     gretl_model_init(&model, dset);
586 
587     /* create regression list */
588     list = gretl_list_new(realv);
589     if (list == NULL) return 1;
590 
591     if (noint) {
592 	list[0] = realv - 1;
593 	for (i=1; i<=list[0]; i++) {
594 	    list[i] = i;
595 	}
596     } else {
597 	list[0] = realv;
598 	list[1] = 1;
599 	list[2] = 0;
600 	for (i=3; i<=list[0]; i++) {
601 	    list[i] = i - 1;
602 	}
603     }
604 
605     /* set up list of polynomial terms, if needed */
606     if (npoly) {
607 	polylist = gretl_list_new(npoly);
608 	if (polylist == NULL) {
609 	    free(list);
610 	    return 1;
611 	}
612 	for (i=1; i<=npoly; i++) {
613 	    polylist[i] = i + 1;
614 	}
615     }
616 
617 #ifdef STANDALONE
618     mplsq = get_mplsq();
619 #else
620     mplsq = get_plugin_function("mplsq");
621 #endif
622     if (mplsq == NULL) {
623         pputs(prn, "Couldn't load mplsq function\n");
624         err = 1;
625     }
626 
627     if (!err) {
628         err = (*mplsq)(list, polylist, zdigits, dset,
629 		       &model, OPT_NONE);
630     }
631 
632     free(list);
633     free(polylist);
634 
635     if (verbose) {
636 	pprintf(prn, "\nChecking gretl multiple-precision results (%d coefficients):\n\n"
637 		"%44s%24s\n\n", certvals->ncoeff, "certified", "libgretl");
638 
639 	for (i=0; i<certvals->ncoeff; i++) {
640 	    char label[24];
641 
642 	    if (!na(certvals->coeff[i])) {
643 		sprintf(label, "B[%d] estimate", i);
644 		pprintf(prn, " %-20s %#24.*g %#24.*g\n",
645 			label,
646 			MP_PRINT_DIGITS, certvals->coeff[i],
647 			MP_PRINT_DIGITS, model.coeff[i]);
648 	    }
649 
650 	    if (!na(certvals->sderr[i])) {
651 		pprintf(prn, " %-20s %#24.*g %#24.*g\n",
652 			"(std. error)",
653 			MP_PRINT_DIGITS, certvals->sderr[i],
654 			MP_PRINT_DIGITS, model.sderr[i]);
655 	    }
656 	}
657 
658 	pputc(prn, '\n');
659 
660 	pprintf(prn, " %-20s %#24.*g %#24.*g\n"
661 		" %-20s %#24.*g %#24.*g\n"
662 		" %-20s %#24.*g %#24.*g\n"
663 		" %-20s %#24.*g %#24.*g\n",
664 		"standard error",
665 		MP_PRINT_DIGITS, certvals->sigma,
666 		MP_PRINT_DIGITS, model.sigma,
667 		"error sum of squares",
668 		MP_PRINT_DIGITS, certvals->ess,
669 		MP_PRINT_DIGITS, model.ess,
670 		"R-squared",
671 		MP_PRINT_DIGITS, certvals->rsq,
672 		MP_PRINT_DIGITS, model.rsq,
673 		"F",
674 		MP_PRINT_DIGITS, certvals->fstt,
675 		MP_PRINT_DIGITS, model.fstt);
676     }
677 
678     acc = get_accuracy(&model, certvals, prn);
679 
680     if (verbose) {
681 	pputc(prn, '\n');
682     }
683 
684     if (acc < 12.0) {
685 	*mpfails += 1;
686 	pprintf(prn, "* Using gretl GMP plugin: errors found when using"
687 		" %d significant figures\n  (worst-case log relative error = %.3f)\n",
688 		MP_CHECK_DIGITS, acc);
689     } else {
690 	pprintf(prn, "* Using gretl GMP plugin: results correct to"
691 		" at least %d digits\n", (int) acc);
692 	pprintf(prn, "  (worst-case log relative error = %.3f)\n", acc);
693     }
694 
695     clear_model(&model);
696 
697     return err;
698 }
699 
700 static
run_gretl_comparison(const char * datname,DATASET * dset,mp_results * certvals,int * errs,int * poor,PRN * prn)701 int run_gretl_comparison (const char *datname,
702 			  DATASET *dset,
703 			  mp_results *certvals,
704 			  int *errs, int *poor,
705 			  PRN *prn)
706 {
707     int *list = NULL;
708     MODEL *model = NULL;
709     double acc;
710     int wc = 0;
711     int i;
712     static int modelnum;
713 
714     list = gretl_list_new(dset->v);
715     if (list == NULL) return 1;
716 
717     if (noint) {
718 	list[0] = dset->v - 1;
719 	for (i=1; i<=list[0]; i++) {
720 	    list[i] = i;
721 	}
722     } else {
723 	list[0] = dset->v;
724 	list[1] = 1;
725 	list[2] = 0;
726 	for (i=3; i<=list[0]; i++) {
727 	    list[i] = i - 1;
728 	}
729     }
730 
731     model = gretl_model_new();
732 
733     *model = lsq(list, dset, OLS, OPT_Z);
734 
735     if (model->errcode) {
736 	if (verbose) {
737 	    pputc(prn, '\n');
738 	}
739 	pprintf(prn, "gretl error code: %d\n", model->errcode);
740 	errmsg(model->errcode, prn);
741 
742 	if (strcmp(datname, "Filip.dat") == 0 &&
743 	    model->errcode == E_SINGULAR) {
744 	    pputs(prn, "(This error was expected with standard libgretl)\n");
745 	} else {
746 	    *errs += 1;
747 	}
748 
749 	goto free_stuff;
750     }
751 
752     if (verbose) {
753 	int i;
754 
755 	model->ID = ++modelnum;
756 	printmodel(model, dset, OPT_NONE, prn);
757 
758 	for (i=0; i<model->ncoeff; i++) {
759 	    pprintf(prn, " gretl coefficient[%d] = %#.10g\n", i,
760 		    model->coeff[i]);
761 	}
762     }
763 
764     /* special treatment when there's no intercept */
765     if (noint) {
766 	double xx = 0.0;
767 	int t;
768 
769 	for (t=0; t<dset->n; t++) {
770 	    xx += dset->Z[1][t] *  dset->Z[1][t];
771 	}
772 	model->rsq = 1.0 - model->ess / xx;
773     }
774 
775     acc = get_accuracy(model, certvals, prn);
776 
777     if (verbose) {
778 	pputs(prn, "\n ***");
779     }
780 
781     if (acc >= 6.0) {
782 	pprintf(prn, "* %s results correct to at least %d digits\n",
783 		LIBGRETLSTR, (int) acc);
784     } else if (acc >= MIN_DIGITS) {
785 	if (strcmp(datname, "Filip.dat") && strcmp(datname, "Wampler5.dat")) {
786 	    pprintf(prn, "* %s results correct to only %d digits: "
787 		    "POOR\n", LIBGRETLSTR, (int) acc);
788 	    *poor += 1;
789 	} else {
790 	    pprintf(prn, "* %s results correct to at least %.2f digits\n"
791 		    "  (OK on Filip.dat and Wampler5.dat)\n", LIBGRETLSTR, acc);
792 	    wc = 1;
793 	}
794     } else {
795 	pprintf(prn, "* %s results correct to less than "
796 		"%d digits: UNACCEPTABLE\n", LIBGRETLSTR, MIN_DIGITS);
797 	*poor += 1;
798     }
799 
800     if (!wc) {
801 	pprintf(prn, "  (worst-case log relative error = %.3f)\n", acc);
802     }
803 
804     if (verbose) {
805 	pputc(prn, '\n');
806     }
807 
808  free_stuff:
809 
810     free(list);
811     gretl_model_free(model);
812 
813     return 0;
814 }
815 
816 #ifdef STANDALONE
817 
main(int argc,char * argv[])818 int main (int argc, char *argv[])
819 {
820     int j;
821     PRN *prn;
822     DATASET *dataset;
823     mp_results *certvals = NULL;
824     int ntests, missing = 0, modelerrs = 0, poorvals = 0;
825     int polyterms = 0, mpfails = 0;
826     int *zdigits = NULL;
827     const char *prog;
828 
829 # ifdef LONGLEY_ONLY
830     const char *nist_files[] = {
831 	"Longley.dat"
832     };
833 # else
834     const char *nist_files[] = {
835 	"Norris.dat",
836 	"Pontius.dat",
837 	"NoInt1.dat",
838 	"NoInt2.dat",
839 	"Filip.dat",
840 	"Longley.dat",
841 	"Wampler1.dat",
842 	"Wampler2.dat",
843 	"Wampler3.dat",
844 	"Wampler4.dat",
845 	"Wampler5.dat"
846     };
847 # endif /* LONGLEY_ONLY */
848 
849     ntests = sizeof nist_files / sizeof *nist_files;
850 
851     prog = argv[0];
852 
853     if (argc >= 2 && strcmp(argv[1], "-v") == 0) verbose = 1;
854     if (argc >= 2 && strcmp(argv[1], "-vv") == 0) verbose = 2;
855 
856     strcpy(datadir, ".");
857     if (argc == 2 && argv[1][0] != '-') {
858 	strcpy(datadir, argv[1]);
859     } else if (argc == 3) {
860 	strcpy(datadir, argv[2]);
861     }
862 
863     libgretl_init();
864 
865     prn = gretl_print_new(GRETL_PRINT_STDOUT, NULL);
866 
867     for (j=0; j<ntests; j++) {
868 	if (read_nist_file(nist_files[j], &dataset, &certvals,
869 			   &polyterms, &zdigits, prn)) {
870 	    pprintf(prn, "Error processing %s\n", nist_files[j]);
871 	    missing++;
872 
873 	} else {
874 	    run_gretl_comparison (nist_files[j], dataset, certvals,
875 				  &modelerrs, &poorvals, prn);
876 
877 	    run_gretl_mp_comparison (dataset, certvals, polyterms,
878 				     zdigits, &mpfails, prn);
879 
880 	    free_mp_results(certvals);
881 	    certvals = NULL;
882 	    destroy_dataset(dataset);
883 	    dataset = NULL;
884 	    free(zdigits);
885 	    zdigits = NULL;
886 	}
887     }
888 
889     print_nist_summary(ntests, missing, modelerrs, poorvals, mpfails,
890 		       prog, prn);
891 
892     gretl_print_destroy(prn);
893 
894     libgretl_cleanup();
895 
896     return (missing || modelerrs || poorvals);
897 }
898 
899 #else /* !STANDALONE */
900 
nist_intro(PRN * prn)901 static void nist_intro (PRN *prn)
902 {
903     pputs(prn, "What you should see below: A series of 11 tests, using the "
904 	  "reference data sets for linear regression from the U.S. National "
905 	  "Institute of Standards and Technology (NIST). If you scroll to "
906 	  "the bottom you will see a summary of the results: if all is well "
907 	  "there should be 0 values for \"data files missing\", \"unexpected "
908 	  "errors\" and \"poor results\".\n\n");
909 
910     pputs(prn, "The \"log relative error\" is defined as the negative of the "
911 	  "base-10 logarithm of (|q - c| / |c|), where q denotes the result "
912 	  "produced by gretl and c denotes the certified value.  It represents "
913 	  "the number of correct digits in the gretl output.  For more details, "
914 	  "see B. D. McCullough, \"Assessing the Reliability of "
915 	  "Statistical Software: Part I\", The American Statistician, 52 "
916 	  "(1998), pp. 358-366.\n\n");
917 
918     pputs(prn, "Each test case is run twice, once using the standard "
919 	  "linear regression calculation in the gretl library and once "
920 	  "in multiple precision arithmetic using the GMP library.\n\n");
921 
922     pputs(prn, "For more information, please see\n"
923 	  "http://www.itl.nist.gov/div898/strd/general/main.html");
924 
925     pputs(prn, "\n\n");
926 }
927 
run_nist_tests(const char * datapath,const char * outfile,int verbosity)928 int run_nist_tests (const char *datapath, const char *outfile, int verbosity)
929 {
930     int j;
931     PRN *prn;
932     DATASET *dataset;
933     mp_results *certvals = NULL;
934     int *zdigits = NULL;
935     int ntests, missing = 0, modelerrs = 0, poorvals = 0;
936     int polyterms = 0, mpfails = 0;
937     const char *nist_files[] = {
938 	"Norris.dat",
939 	"Pontius.dat",
940 	"NoInt1.dat",
941 	"NoInt2.dat",
942 	"Filip.dat",
943 	"Longley.dat",
944 	"Wampler1.dat",
945 	"Wampler2.dat",
946 	"Wampler3.dat",
947 	"Wampler4.dat",
948 	"Wampler5.dat"
949     };
950     int err = 0;
951 
952     gretl_push_c_numeric_locale();
953 
954     ntests = sizeof nist_files / sizeof *nist_files;
955 
956     verbose = verbosity;
957 
958     sprintf(datadir, "%snist", datapath);
959 
960     prn = gretl_print_new_with_filename(outfile, &err);
961 
962     nist_intro(prn);
963 
964     for (j=0; j<ntests; j++) {
965 	if (read_nist_file(nist_files[j], &dataset, &certvals,
966 			   &polyterms, &zdigits, prn)) {
967 	    pprintf(prn, "Error processing %s\n", nist_files[j]);
968 	    missing++;
969 
970 	} else {
971 	    run_gretl_comparison (nist_files[j], dataset, certvals,
972 				  &modelerrs, &poorvals, prn);
973 
974 	    run_gretl_mp_comparison (dataset, certvals, polyterms,
975 				     zdigits, &mpfails, prn);
976 
977 	    free_mp_results(certvals);
978 	    certvals = NULL;
979 	    destroy_dataset(dataset);
980 	    dataset = NULL;
981 	    free(zdigits);
982 	    zdigits = NULL;
983 	}
984     }
985 
986     print_nist_summary(ntests, missing, modelerrs, poorvals, mpfails,
987 		       NULL, prn);
988 
989     gretl_pop_c_numeric_locale();
990 
991     gretl_print_destroy(prn);
992 
993     return (missing || modelerrs || poorvals);
994 }
995 
996 #endif /* STANDALONE */
997 
998 #ifdef STANDALONE
999 
1000 # ifdef _WIN32
1001 #  include <windows.h>
1002 # else
1003 #  include <dlfcn.h>
1004 # endif
1005 
get_mplsq(void)1006 static void *get_mplsq (void)
1007 {
1008     void *handle;
1009     void *funp;
1010 
1011 # ifdef _WIN32
1012     handle = LoadLibrary("plugins\\mp_ols.dll");
1013     if (handle == NULL) return NULL;
1014 # else
1015     handle = dlopen("../plugin/.libs/mp_ols.so", RTLD_LAZY);
1016     if (handle == NULL) {
1017 	fputs(dlerror(), stderr);
1018 	return NULL;
1019     }
1020 # endif /* _WIN32 */
1021 
1022 # ifdef _WIN32
1023     funp = GetProcAddress(handle, "mplsq");
1024 # else
1025     funp = dlsym(handle, "mplsq");
1026     if (funp == NULL) {
1027 	/* try munged version */
1028 	funp = dlsym(handle, "_mplsq");
1029 	if (funp == NULL) {
1030 	    fputs(dlerror(), stderr);
1031 	}
1032     }
1033 # endif /* _WIN32 */
1034 
1035     if (funp == NULL) {
1036 	close_plugin(handle);
1037     }
1038 
1039     return funp;
1040 }
1041 
1042 #endif /* STANDALONE */
1043 
1044 
1045