1 #include <math.h>
2 #include "bench.h"
3 
4 #define BOOTSTRAP_COUNT	200
5 
6 /*
7  * a comparison function used by qsort
8  */
9 int
int_compare(const void * a,const void * b)10 int_compare(const void *a, const void *b)
11 {
12 	if (*(int*)a < *(int*)b) return -1;
13 	if (*(int*)a > *(int*)b) return 1;
14 	return 0;
15 }
16 
17 /*
18  * a comparison function used by qsort
19  */
20 int
uint64_compare(const void * a,const void * b)21 uint64_compare(const void *a, const void *b)
22 {
23 	if (*(uint64*)a < *(uint64*)b) return -1;
24 	if (*(uint64*)a > *(uint64*)b) return  1;
25 	return 0;
26 }
27 
28 /*
29  * a comparison function used by qsort
30  */
31 int
double_compare(const void * a,const void * b)32 double_compare(const void *a, const void *b)
33 {
34 	if (*(double*)a < *(double*)b) return -1;
35 	if (*(double*)a > *(double*)b) return 1;
36 	return 0;
37 }
38 
39 /*
40  * return the median value of an array of int
41  */
42 int
int_median(int * values,int size)43 int_median(int *values, int size)
44 {
45 	qsort(values, size, sizeof(int), int_compare);
46 
47 	if (size == 0) return 0.;
48 
49 	if (size % 2) {
50 	    return values[size/2];
51 	}
52 
53 	return (values[size/2 - 1] + values[size/2]) / 2;
54 }
55 
56 /*
57  * return the median value of an array of int
58  */
59 uint64
uint64_median(uint64 * values,int size)60 uint64_median(uint64 *values, int size)
61 {
62 	qsort(values, size, sizeof(uint64), uint64_compare);
63 
64 	if (size == 0) return 0.;
65 
66 	if (size % 2) {
67 	    return values[size/2];
68 	}
69 
70 	return (values[size/2 - 1] + values[size/2]) / 2;
71 }
72 
73 /*
74  * return the median value of an array of doubles
75  */
76 double
double_median(double * values,int size)77 double_median(double *values, int size)
78 {
79 	qsort(values, size, sizeof(double), double_compare);
80 
81 	if (size == 0) return 0.;
82 
83 	if (size % 2) {
84 	    return values[size/2];
85 	}
86 
87 	return (values[size/2 - 1] + values[size/2]) / 2.0;
88 }
89 
90 /*
91  * return the mean value of an array of int
92  */
93 int
int_mean(int * values,int size)94 int_mean(int *values, int size)
95 {
96 	int	i;
97 	int	sum = 0;
98 
99 	for (i = 0; i < size; ++i)
100 		sum += values[i];
101 
102 	return sum / size;
103 }
104 
105 /*
106  * return the mean value of an array of int
107  */
108 uint64
uint64_mean(uint64 * values,int size)109 uint64_mean(uint64 *values, int size)
110 {
111 	int	i;
112 	uint64	sum = 0;
113 
114 	for (i = 0; i < size; ++i)
115 		sum += values[i];
116 
117 	return sum / size;
118 }
119 
120 /*
121  * return the mean value of an array of doubles
122  */
123 double
double_mean(double * values,int size)124 double_mean(double *values, int size)
125 {
126 	int	i;
127 	double	sum = 0.0;
128 
129 	for (i = 0; i < size; ++i)
130 		sum += values[i];
131 
132 	return sum / (double)size;
133 }
134 
135 /*
136  * return the min value of an array of int
137  */
138 int
int_min(int * values,int size)139 int_min(int *values, int size)
140 {
141 	int	i;
142 	int	min = values[0];
143 
144 	for (i = 1; i < size; ++i)
145 		if (values[i] < min) min = values[i];
146 
147 	return min;
148 }
149 
150 /*
151  * return the min value of an array of int
152  */
153 uint64
uint64_min(uint64 * values,int size)154 uint64_min(uint64 *values, int size)
155 {
156 	int	i;
157 	uint64	min = values[0];
158 
159 	for (i = 1; i < size; ++i)
160 		if (values[i] < min) min = values[i];
161 
162 	return min;
163 }
164 
165 /*
166  * return the min value of an array of doubles
167  */
168 double
double_min(double * values,int size)169 double_min(double *values, int size)
170 {
171 	int	i;
172 	double	min = values[0];
173 
174 	for (i = 1; i < size; ++i)
175 		if (values[i] < min) min = values[i];
176 
177 	return min;
178 }
179 
180 /*
181  * return the max value of an array of int
182  */
183 int
int_max(int * values,int size)184 int_max(int *values, int size)
185 {
186 	int	i;
187 	int	max = values[0];
188 
189 	for (i = 1; i < size; ++i)
190 		if (values[i] > max) max = values[i];
191 
192 	return max;
193 }
194 
195 /*
196  * return the max value of an array of int
197  */
198 uint64
uint64_max(uint64 * values,int size)199 uint64_max(uint64 *values, int size)
200 {
201 	int	i;
202 	uint64	max = values[0];
203 
204 	for (i = 1; i < size; ++i)
205 		if (values[i] > max) max = values[i];
206 
207 	return max;
208 }
209 
210 /*
211  * return the max value of an array of doubles
212  */
213 double
double_max(double * values,int size)214 double_max(double *values, int size)
215 {
216 	int	i;
217 	double	max = values[0];
218 
219 	for (i = 1; i < size; ++i)
220 		if (values[i] > max) max = values[i];
221 
222 	return max;
223 }
224 
225 /*
226  * return the variance of an array of ints
227  *
228  * Reference: "Statistics for Experimenters" by
229  * 	George E.P. Box et. al., page 41
230  */
int_variance(int * values,int size)231 double	int_variance(int *values, int size)
232 {
233 	int	i;
234 	double	sum = 0.0;
235 	int	mean = int_mean(values, size);
236 
237 	for (i = 0; i < size; ++i)
238 		sum += (double)((values[i] - mean) * (values[i] - mean));
239 
240 	return sum / (double)(size - 1);
241 }
242 
243 /*
244  * return the variance of an array of uint64s
245  */
uint64_variance(uint64 * values,int size)246 double	uint64_variance(uint64 *values, int size)
247 {
248 	int	i;
249 	double	sum = 0.0;
250 	uint64	mean = uint64_mean(values, size);
251 
252 	for (i = 0; i < size; ++i)
253 		sum += (double)((values[i] - mean) * (values[i] - mean));
254 	return sum / (double)(size - 1);
255 }
256 
257 /*
258  * return the variance of an array of doubles
259  */
double_variance(double * values,int size)260 double	double_variance(double *values, int size)
261 {
262 	int	i;
263 	double	sum = 0.0;
264 	double	mean = double_mean(values, size);
265 
266 	for (i = 0; i < size; ++i)
267 		sum += (double)((values[i] - mean) * (values[i] - mean));
268 
269 	return sum / (double)(size - 1);
270 }
271 
272 /*
273  * return the moment of an array of ints
274  *
275  * Reference: "Statistics for Experimenters" by
276  * 	George E.P. Box et. al., page 41, 90
277  */
int_moment(int moment,int * values,int size)278 double	int_moment(int moment, int *values, int size)
279 {
280 	int	i, j;
281 	double	sum = 0.0;
282 	int	mean = int_mean(values, size);
283 
284 	for (i = 0; i < size; ++i) {
285 		double diff = values[i] - mean;
286 		double m = diff;
287 		for (j = 1; j < moment; ++j)
288 			m *= diff;
289 		sum += m;
290 	}
291 
292 	return sum / (double)size;
293 }
294 
295 /*
296  * return the moment of an array of uint64s
297  */
uint64_moment(int moment,uint64 * values,int size)298 double	uint64_moment(int moment, uint64 *values, int size)
299 {
300 	int	i, j;
301 	double	sum = 0.0;
302 	uint64	mean = uint64_mean(values, size);
303 
304 	for (i = 0; i < size; ++i) {
305 		double diff = values[i] - mean;
306 		double m = diff;
307 		for (j = 1; j < moment; ++j)
308 			m *= diff;
309 		sum += m;
310 	}
311 
312 	return sum / (double)size;
313 }
314 
315 /*
316  * return the moment of an array of doubles
317  */
double_moment(int moment,double * values,int size)318 double	double_moment(int moment, double *values, int size)
319 {
320 	int	i, j;
321 	double	sum = 0.0;
322 	double	mean = double_mean(values, size);
323 
324 	for (i = 0; i < size; ++i) {
325 		double diff = values[i] - mean;
326 		double m = diff;
327 		for (j = 1; j < moment; ++j)
328 			m *= diff;
329 		sum += m;
330 	}
331 
332 	return sum / (double)size;
333 }
334 
335 /*
336  * return the standard error of an array of ints
337  *
338  * Reference: "Statistics for Experimenters" by
339  * 	George E.P. Box et. al., page 41
340  */
int_stderr(int * values,int size)341 double	int_stderr(int *values, int size)
342 {
343 	return sqrt(int_variance(values, size));
344 }
345 
346 /*
347  * return the standard error of an array of uint64s
348  */
uint64_stderr(uint64 * values,int size)349 double	uint64_stderr(uint64 *values, int size)
350 {
351 	return sqrt(uint64_variance(values, size));
352 }
353 
354 /*
355  * return the standard error of an array of doubles
356  */
double_stderr(double * values,int size)357 double	double_stderr(double *values, int size)
358 {
359 	return sqrt(double_variance(values, size));
360 }
361 
362 /*
363  * return the skew of an array of ints
364  *
365  */
int_skew(int * values,int size)366 double	int_skew(int *values, int size)
367 {
368 	double	sigma = int_stderr(values, size);
369 	double	moment3 = int_moment(3, values, size);
370 
371 	return moment3 / (sigma * sigma * sigma);
372 }
373 
374 /*
375  * return the skew of an array of uint64s
376  */
uint64_skew(uint64 * values,int size)377 double	uint64_skew(uint64 *values, int size)
378 {
379 	double	sigma = uint64_stderr(values, size);
380 	double	moment3 = uint64_moment(3, values, size);
381 
382 	return moment3 / (sigma * sigma * sigma);
383 }
384 
385 /*
386  * return the skew of an array of doubles
387  */
double_skew(double * values,int size)388 double	double_skew(double *values, int size)
389 {
390 	double	sigma = double_stderr(values, size);
391 	double	moment3 = double_moment(3, values, size);
392 
393 	return moment3 / (sigma * sigma * sigma);
394 }
395 
396 /*
397  * return the kurtosis of an array of ints
398  *
399  * Reference: "Statistics for Experimenters" by
400  * 	George E.P. Box et. al., page 90;
401  */
int_kurtosis(int * values,int size)402 double	int_kurtosis(int *values, int size)
403 {
404 	double	variance = int_variance(values, size);
405 	double	moment4 = int_moment(4, values, size);
406 
407 	return moment4 / (variance * variance) - 3;
408 }
409 
410 /*
411  * return the kurtosis of an array of uint64s
412  */
uint64_kurtosis(uint64 * values,int size)413 double	uint64_kurtosis(uint64 *values, int size)
414 {
415 	double	variance = uint64_variance(values, size);
416 	double	moment4 = uint64_moment(4, values, size);
417 
418 	return moment4 / (variance * variance) - 3;
419 }
420 
421 /*
422  * return the kurtosis of an array of doubles
423  */
double_kurtosis(double * values,int size)424 double	double_kurtosis(double *values, int size)
425 {
426 	double	variance = double_variance(values, size);
427 	double	moment4 = double_moment(4, values, size);
428 
429 	return moment4 / (variance * variance) - 3;
430 }
431 
432 /*
433  * BOOTSTRAP:
434  *
435  * stderr = sqrt(sum_i(s[i] - sum_j(s[j])/B)**2 / (B - 1))
436  *
437  * Reference: "An Introduction to the Bootstrap" by Bradley
438  *	Efron and Robert J. Tibshirani, page 12.
439  */
440 
441 /*
442  * return the bootstrap estimation of the standard error
443  * of an array of ints
444  */
int_bootstrap_stderr(int * values,int size,int_stat f)445 double	int_bootstrap_stderr(int *values, int size, int_stat f)
446 {
447 	int	i, j;
448 	int    *samples = (int*)malloc(size * sizeof(int));
449 	double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
450 	double	s_sum = 0;
451 	double	sum = 0;
452 
453 	/* generate the stderr for each of the bootstrap samples */
454 	for (i = 0; i < BOOTSTRAP_COUNT; ++i) {
455 		for (j = 0; j < size; ++j)
456 			samples[j] = values[rand() % size];
457 		s[i] = (double)(*f)(samples, size);
458 		s_sum += s[i];	/* CHS: worry about overflow */
459 	}
460 	s_sum /= (double)BOOTSTRAP_COUNT;
461 
462 	for (i = 0; i < BOOTSTRAP_COUNT; ++i)
463 		sum += (s[i] - s_sum) * (s[i] - s_sum);
464 
465 	sum /= (double)(BOOTSTRAP_COUNT - 1);
466 
467 	free(samples);
468 	free(s);
469 
470 	return sqrt(sum);
471 }
472 
473 /*
474  * return the bootstrap estimation of the standard error
475  * of an array of uint64s
476  */
uint64_bootstrap_stderr(uint64 * values,int size,uint64_stat f)477 double	uint64_bootstrap_stderr(uint64 *values, int size, uint64_stat f)
478 {
479 	int	i, j;
480 	uint64 *samples = (uint64*)malloc(size * sizeof(uint64));
481 	double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
482 	double	s_sum;
483 	double	sum;
484 
485 	/* generate the stderr for each of the bootstrap samples */
486 	for (i = 0, s_sum = 0.0; i < BOOTSTRAP_COUNT; ++i) {
487 		for (j = 0; j < size; ++j)
488 			samples[j] = values[rand() % size];
489 		s[i] = (double)(*f)(samples, size);
490 		s_sum += s[i];	/* CHS: worry about overflow */
491 	}
492 	s_sum /= (double)BOOTSTRAP_COUNT;
493 
494 	for (i = 0, sum = 0.0; i < BOOTSTRAP_COUNT; ++i)
495 		sum += (s[i] - s_sum) * (s[i] - s_sum);
496 
497 	free(samples);
498 	free(s);
499 
500 	return sqrt(sum/(double)(BOOTSTRAP_COUNT - 1));
501 }
502 
503 /*
504  * return the bootstrap estimation of the standard error
505  * of an array of doubles
506  */
double_bootstrap_stderr(double * values,int size,double_stat f)507 double	double_bootstrap_stderr(double *values, int size, double_stat f)
508 {
509 	int	i, j;
510 	double *samples = (double*)malloc(size * sizeof(double));
511 	double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
512 	double	s_sum = 0;
513 	double	sum = 0;
514 
515 	/* generate the stderr for each of the bootstrap samples */
516 	for (i = 0; i < BOOTSTRAP_COUNT; ++i) {
517 		for (j = 0; j < size; ++j)
518 			samples[j] = values[rand() % size];
519 		s[i] = (*f)(samples, size);
520 		s_sum += (double)s[i];	/* CHS: worry about overflow */
521 	}
522 	s_sum /= (double)BOOTSTRAP_COUNT;
523 
524 	for (i = 0; i < BOOTSTRAP_COUNT; ++i)
525 		sum += (s[i] - s_sum) * (s[i] - s_sum);
526 
527 	sum /= (double)(BOOTSTRAP_COUNT - 1);
528 
529 	free(samples);
530 	free(s);
531 
532 	return sqrt(sum);
533 }
534 
535 /*
536  * regression(x, y, sig, n, a, b, sig_a, sig_b, chi2)
537  *
538  * This routine is derived from equations in "Numerical Recipes in C"
539  * (second edition) by Press, et. al.,  pages 661-665.
540  *
541  * compute the linear regression y = a + bx for (x,y), where y[i] has
542  * standard deviation sig[i].
543  *
544  * returns the coefficients a and b, along with an estimation of their
545  * error (standard deviation) in sig_a and sig_b.
546  *
547  * returns chi2 for "goodness of fit" information.
548  */
549 
550 void
regression(double * x,double * y,double * sig,int n,double * a,double * b,double * sig_a,double * sig_b,double * chi2)551 regression(double *x, double *y, double *sig, int n,
552 	   double *a, double *b, double *sig_a, double *sig_b,
553 	   double *chi2)
554 {
555 	int	i;
556 	double	S = 0.0, Sx = 0.0, Sy = 0.0, Stt = 0.0, Sx_S;
557 
558 	/* compute some basic statistics */
559 	for (i = 0; i < n; ++i) {
560 		/* Equations 15.2.4: for S, Sx, Sy */
561 		double	weight = 1.0 / (sig ? sig[i] * sig[i] : 1.0);
562 		S += weight;
563 		Sx += weight * x[i];
564 		Sy += weight * y[i];
565 	}
566 
567 	*b = 0.0;
568 	Sx_S = Sx / S;
569 	for (i = 0; i < n; ++i) {
570 		/*
571 		 * Equation 15.2.15 for t
572 		 * Equation 15.2.16 for Stt
573 		 * Equation 15.2.17 for b, do summation portion of equation
574 		 *	compute Sum i=0,n-1 (t_i * y[i] / sig[i]))
575 		 */
576 		double t_i = (x[i] - Sx_S) / (sig ? sig[i] : 1.0);
577 		Stt += t_i * t_i;
578 		*b  += t_i * y[i] / (sig ? sig[i] : 1.0);
579 	}
580 
581 	/*
582 	 * Equation 15.2.17 for b, do 1/Stt * summation
583 	 * Equation 15.2.18 for a
584 	 * Equation 15.2.19 for sig_a
585 	 * Equation 15.2.20 for sig_b
586 	 */
587 	*b /= Stt;
588 	*a = (Sy - *b * Sx) / S;
589 	*sig_a = sqrt((1.0 + (Sx * Sx) / (S * Stt)) / S);
590 	*sig_b = sqrt(1.0 / Stt);
591 
592 	/* Equation 15.2.2 for chi2, the merit function */
593 	*chi2 = 0.0;
594 	for (i = 0; i < n; ++i) {
595 		double merit = (y[i] - ((*a) + (*b) * x[i])) / (sig ? sig[i] : 1.0);
596 		*chi2 += merit * merit;
597 	}
598 	if (sig == NULL) {
599 	  *sig_a *= sqrt((*chi2) / (n - 2));
600 	  *sig_b *= sqrt((*chi2) / (n - 2));
601 	}
602 }
603 
604