1 #include <math.h>
2 #include "bench.h"
3
4 #define BOOTSTRAP_COUNT 200
5
6 /*
7 * a comparison function used by qsort
8 */
9 int
int_compare(const void * a,const void * b)10 int_compare(const void *a, const void *b)
11 {
12 if (*(int*)a < *(int*)b) return -1;
13 if (*(int*)a > *(int*)b) return 1;
14 return 0;
15 }
16
17 /*
18 * a comparison function used by qsort
19 */
20 int
uint64_compare(const void * a,const void * b)21 uint64_compare(const void *a, const void *b)
22 {
23 if (*(uint64*)a < *(uint64*)b) return -1;
24 if (*(uint64*)a > *(uint64*)b) return 1;
25 return 0;
26 }
27
28 /*
29 * a comparison function used by qsort
30 */
31 int
double_compare(const void * a,const void * b)32 double_compare(const void *a, const void *b)
33 {
34 if (*(double*)a < *(double*)b) return -1;
35 if (*(double*)a > *(double*)b) return 1;
36 return 0;
37 }
38
39 /*
40 * return the median value of an array of int
41 */
42 int
int_median(int * values,int size)43 int_median(int *values, int size)
44 {
45 qsort(values, size, sizeof(int), int_compare);
46
47 if (size == 0) return 0.;
48
49 if (size % 2) {
50 return values[size/2];
51 }
52
53 return (values[size/2 - 1] + values[size/2]) / 2;
54 }
55
56 /*
57 * return the median value of an array of int
58 */
59 uint64
uint64_median(uint64 * values,int size)60 uint64_median(uint64 *values, int size)
61 {
62 qsort(values, size, sizeof(uint64), uint64_compare);
63
64 if (size == 0) return 0.;
65
66 if (size % 2) {
67 return values[size/2];
68 }
69
70 return (values[size/2 - 1] + values[size/2]) / 2;
71 }
72
73 /*
74 * return the median value of an array of doubles
75 */
76 double
double_median(double * values,int size)77 double_median(double *values, int size)
78 {
79 qsort(values, size, sizeof(double), double_compare);
80
81 if (size == 0) return 0.;
82
83 if (size % 2) {
84 return values[size/2];
85 }
86
87 return (values[size/2 - 1] + values[size/2]) / 2.0;
88 }
89
90 /*
91 * return the mean value of an array of int
92 */
93 int
int_mean(int * values,int size)94 int_mean(int *values, int size)
95 {
96 int i;
97 int sum = 0;
98
99 for (i = 0; i < size; ++i)
100 sum += values[i];
101
102 return sum / size;
103 }
104
105 /*
106 * return the mean value of an array of int
107 */
108 uint64
uint64_mean(uint64 * values,int size)109 uint64_mean(uint64 *values, int size)
110 {
111 int i;
112 uint64 sum = 0;
113
114 for (i = 0; i < size; ++i)
115 sum += values[i];
116
117 return sum / size;
118 }
119
120 /*
121 * return the mean value of an array of doubles
122 */
123 double
double_mean(double * values,int size)124 double_mean(double *values, int size)
125 {
126 int i;
127 double sum = 0.0;
128
129 for (i = 0; i < size; ++i)
130 sum += values[i];
131
132 return sum / (double)size;
133 }
134
135 /*
136 * return the min value of an array of int
137 */
138 int
int_min(int * values,int size)139 int_min(int *values, int size)
140 {
141 int i;
142 int min = values[0];
143
144 for (i = 1; i < size; ++i)
145 if (values[i] < min) min = values[i];
146
147 return min;
148 }
149
150 /*
151 * return the min value of an array of int
152 */
153 uint64
uint64_min(uint64 * values,int size)154 uint64_min(uint64 *values, int size)
155 {
156 int i;
157 uint64 min = values[0];
158
159 for (i = 1; i < size; ++i)
160 if (values[i] < min) min = values[i];
161
162 return min;
163 }
164
165 /*
166 * return the min value of an array of doubles
167 */
168 double
double_min(double * values,int size)169 double_min(double *values, int size)
170 {
171 int i;
172 double min = values[0];
173
174 for (i = 1; i < size; ++i)
175 if (values[i] < min) min = values[i];
176
177 return min;
178 }
179
180 /*
181 * return the max value of an array of int
182 */
183 int
int_max(int * values,int size)184 int_max(int *values, int size)
185 {
186 int i;
187 int max = values[0];
188
189 for (i = 1; i < size; ++i)
190 if (values[i] > max) max = values[i];
191
192 return max;
193 }
194
195 /*
196 * return the max value of an array of int
197 */
198 uint64
uint64_max(uint64 * values,int size)199 uint64_max(uint64 *values, int size)
200 {
201 int i;
202 uint64 max = values[0];
203
204 for (i = 1; i < size; ++i)
205 if (values[i] > max) max = values[i];
206
207 return max;
208 }
209
210 /*
211 * return the max value of an array of doubles
212 */
213 double
double_max(double * values,int size)214 double_max(double *values, int size)
215 {
216 int i;
217 double max = values[0];
218
219 for (i = 1; i < size; ++i)
220 if (values[i] > max) max = values[i];
221
222 return max;
223 }
224
225 /*
226 * return the variance of an array of ints
227 *
228 * Reference: "Statistics for Experimenters" by
229 * George E.P. Box et. al., page 41
230 */
int_variance(int * values,int size)231 double int_variance(int *values, int size)
232 {
233 int i;
234 double sum = 0.0;
235 int mean = int_mean(values, size);
236
237 for (i = 0; i < size; ++i)
238 sum += (double)((values[i] - mean) * (values[i] - mean));
239
240 return sum / (double)(size - 1);
241 }
242
243 /*
244 * return the variance of an array of uint64s
245 */
uint64_variance(uint64 * values,int size)246 double uint64_variance(uint64 *values, int size)
247 {
248 int i;
249 double sum = 0.0;
250 uint64 mean = uint64_mean(values, size);
251
252 for (i = 0; i < size; ++i)
253 sum += (double)((values[i] - mean) * (values[i] - mean));
254 return sum / (double)(size - 1);
255 }
256
257 /*
258 * return the variance of an array of doubles
259 */
double_variance(double * values,int size)260 double double_variance(double *values, int size)
261 {
262 int i;
263 double sum = 0.0;
264 double mean = double_mean(values, size);
265
266 for (i = 0; i < size; ++i)
267 sum += (double)((values[i] - mean) * (values[i] - mean));
268
269 return sum / (double)(size - 1);
270 }
271
272 /*
273 * return the moment of an array of ints
274 *
275 * Reference: "Statistics for Experimenters" by
276 * George E.P. Box et. al., page 41, 90
277 */
int_moment(int moment,int * values,int size)278 double int_moment(int moment, int *values, int size)
279 {
280 int i, j;
281 double sum = 0.0;
282 int mean = int_mean(values, size);
283
284 for (i = 0; i < size; ++i) {
285 double diff = values[i] - mean;
286 double m = diff;
287 for (j = 1; j < moment; ++j)
288 m *= diff;
289 sum += m;
290 }
291
292 return sum / (double)size;
293 }
294
295 /*
296 * return the moment of an array of uint64s
297 */
uint64_moment(int moment,uint64 * values,int size)298 double uint64_moment(int moment, uint64 *values, int size)
299 {
300 int i, j;
301 double sum = 0.0;
302 uint64 mean = uint64_mean(values, size);
303
304 for (i = 0; i < size; ++i) {
305 double diff = values[i] - mean;
306 double m = diff;
307 for (j = 1; j < moment; ++j)
308 m *= diff;
309 sum += m;
310 }
311
312 return sum / (double)size;
313 }
314
315 /*
316 * return the moment of an array of doubles
317 */
double_moment(int moment,double * values,int size)318 double double_moment(int moment, double *values, int size)
319 {
320 int i, j;
321 double sum = 0.0;
322 double mean = double_mean(values, size);
323
324 for (i = 0; i < size; ++i) {
325 double diff = values[i] - mean;
326 double m = diff;
327 for (j = 1; j < moment; ++j)
328 m *= diff;
329 sum += m;
330 }
331
332 return sum / (double)size;
333 }
334
335 /*
336 * return the standard error of an array of ints
337 *
338 * Reference: "Statistics for Experimenters" by
339 * George E.P. Box et. al., page 41
340 */
int_stderr(int * values,int size)341 double int_stderr(int *values, int size)
342 {
343 return sqrt(int_variance(values, size));
344 }
345
346 /*
347 * return the standard error of an array of uint64s
348 */
uint64_stderr(uint64 * values,int size)349 double uint64_stderr(uint64 *values, int size)
350 {
351 return sqrt(uint64_variance(values, size));
352 }
353
354 /*
355 * return the standard error of an array of doubles
356 */
double_stderr(double * values,int size)357 double double_stderr(double *values, int size)
358 {
359 return sqrt(double_variance(values, size));
360 }
361
362 /*
363 * return the skew of an array of ints
364 *
365 */
int_skew(int * values,int size)366 double int_skew(int *values, int size)
367 {
368 double sigma = int_stderr(values, size);
369 double moment3 = int_moment(3, values, size);
370
371 return moment3 / (sigma * sigma * sigma);
372 }
373
374 /*
375 * return the skew of an array of uint64s
376 */
uint64_skew(uint64 * values,int size)377 double uint64_skew(uint64 *values, int size)
378 {
379 double sigma = uint64_stderr(values, size);
380 double moment3 = uint64_moment(3, values, size);
381
382 return moment3 / (sigma * sigma * sigma);
383 }
384
385 /*
386 * return the skew of an array of doubles
387 */
double_skew(double * values,int size)388 double double_skew(double *values, int size)
389 {
390 double sigma = double_stderr(values, size);
391 double moment3 = double_moment(3, values, size);
392
393 return moment3 / (sigma * sigma * sigma);
394 }
395
396 /*
397 * return the kurtosis of an array of ints
398 *
399 * Reference: "Statistics for Experimenters" by
400 * George E.P. Box et. al., page 90;
401 */
int_kurtosis(int * values,int size)402 double int_kurtosis(int *values, int size)
403 {
404 double variance = int_variance(values, size);
405 double moment4 = int_moment(4, values, size);
406
407 return moment4 / (variance * variance) - 3;
408 }
409
410 /*
411 * return the kurtosis of an array of uint64s
412 */
uint64_kurtosis(uint64 * values,int size)413 double uint64_kurtosis(uint64 *values, int size)
414 {
415 double variance = uint64_variance(values, size);
416 double moment4 = uint64_moment(4, values, size);
417
418 return moment4 / (variance * variance) - 3;
419 }
420
421 /*
422 * return the kurtosis of an array of doubles
423 */
double_kurtosis(double * values,int size)424 double double_kurtosis(double *values, int size)
425 {
426 double variance = double_variance(values, size);
427 double moment4 = double_moment(4, values, size);
428
429 return moment4 / (variance * variance) - 3;
430 }
431
432 /*
433 * BOOTSTRAP:
434 *
435 * stderr = sqrt(sum_i(s[i] - sum_j(s[j])/B)**2 / (B - 1))
436 *
437 * Reference: "An Introduction to the Bootstrap" by Bradley
438 * Efron and Robert J. Tibshirani, page 12.
439 */
440
441 /*
442 * return the bootstrap estimation of the standard error
443 * of an array of ints
444 */
int_bootstrap_stderr(int * values,int size,int_stat f)445 double int_bootstrap_stderr(int *values, int size, int_stat f)
446 {
447 int i, j;
448 int *samples = (int*)malloc(size * sizeof(int));
449 double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
450 double s_sum = 0;
451 double sum = 0;
452
453 /* generate the stderr for each of the bootstrap samples */
454 for (i = 0; i < BOOTSTRAP_COUNT; ++i) {
455 for (j = 0; j < size; ++j)
456 samples[j] = values[rand() % size];
457 s[i] = (double)(*f)(samples, size);
458 s_sum += s[i]; /* CHS: worry about overflow */
459 }
460 s_sum /= (double)BOOTSTRAP_COUNT;
461
462 for (i = 0; i < BOOTSTRAP_COUNT; ++i)
463 sum += (s[i] - s_sum) * (s[i] - s_sum);
464
465 sum /= (double)(BOOTSTRAP_COUNT - 1);
466
467 free(samples);
468 free(s);
469
470 return sqrt(sum);
471 }
472
473 /*
474 * return the bootstrap estimation of the standard error
475 * of an array of uint64s
476 */
uint64_bootstrap_stderr(uint64 * values,int size,uint64_stat f)477 double uint64_bootstrap_stderr(uint64 *values, int size, uint64_stat f)
478 {
479 int i, j;
480 uint64 *samples = (uint64*)malloc(size * sizeof(uint64));
481 double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
482 double s_sum;
483 double sum;
484
485 /* generate the stderr for each of the bootstrap samples */
486 for (i = 0, s_sum = 0.0; i < BOOTSTRAP_COUNT; ++i) {
487 for (j = 0; j < size; ++j)
488 samples[j] = values[rand() % size];
489 s[i] = (double)(*f)(samples, size);
490 s_sum += s[i]; /* CHS: worry about overflow */
491 }
492 s_sum /= (double)BOOTSTRAP_COUNT;
493
494 for (i = 0, sum = 0.0; i < BOOTSTRAP_COUNT; ++i)
495 sum += (s[i] - s_sum) * (s[i] - s_sum);
496
497 free(samples);
498 free(s);
499
500 return sqrt(sum/(double)(BOOTSTRAP_COUNT - 1));
501 }
502
503 /*
504 * return the bootstrap estimation of the standard error
505 * of an array of doubles
506 */
double_bootstrap_stderr(double * values,int size,double_stat f)507 double double_bootstrap_stderr(double *values, int size, double_stat f)
508 {
509 int i, j;
510 double *samples = (double*)malloc(size * sizeof(double));
511 double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
512 double s_sum = 0;
513 double sum = 0;
514
515 /* generate the stderr for each of the bootstrap samples */
516 for (i = 0; i < BOOTSTRAP_COUNT; ++i) {
517 for (j = 0; j < size; ++j)
518 samples[j] = values[rand() % size];
519 s[i] = (*f)(samples, size);
520 s_sum += (double)s[i]; /* CHS: worry about overflow */
521 }
522 s_sum /= (double)BOOTSTRAP_COUNT;
523
524 for (i = 0; i < BOOTSTRAP_COUNT; ++i)
525 sum += (s[i] - s_sum) * (s[i] - s_sum);
526
527 sum /= (double)(BOOTSTRAP_COUNT - 1);
528
529 free(samples);
530 free(s);
531
532 return sqrt(sum);
533 }
534
535 /*
536 * regression(x, y, sig, n, a, b, sig_a, sig_b, chi2)
537 *
538 * This routine is derived from equations in "Numerical Recipes in C"
539 * (second edition) by Press, et. al., pages 661-665.
540 *
541 * compute the linear regression y = a + bx for (x,y), where y[i] has
542 * standard deviation sig[i].
543 *
544 * returns the coefficients a and b, along with an estimation of their
545 * error (standard deviation) in sig_a and sig_b.
546 *
547 * returns chi2 for "goodness of fit" information.
548 */
549
550 void
regression(double * x,double * y,double * sig,int n,double * a,double * b,double * sig_a,double * sig_b,double * chi2)551 regression(double *x, double *y, double *sig, int n,
552 double *a, double *b, double *sig_a, double *sig_b,
553 double *chi2)
554 {
555 int i;
556 double S = 0.0, Sx = 0.0, Sy = 0.0, Stt = 0.0, Sx_S;
557
558 /* compute some basic statistics */
559 for (i = 0; i < n; ++i) {
560 /* Equations 15.2.4: for S, Sx, Sy */
561 double weight = 1.0 / (sig ? sig[i] * sig[i] : 1.0);
562 S += weight;
563 Sx += weight * x[i];
564 Sy += weight * y[i];
565 }
566
567 *b = 0.0;
568 Sx_S = Sx / S;
569 for (i = 0; i < n; ++i) {
570 /*
571 * Equation 15.2.15 for t
572 * Equation 15.2.16 for Stt
573 * Equation 15.2.17 for b, do summation portion of equation
574 * compute Sum i=0,n-1 (t_i * y[i] / sig[i]))
575 */
576 double t_i = (x[i] - Sx_S) / (sig ? sig[i] : 1.0);
577 Stt += t_i * t_i;
578 *b += t_i * y[i] / (sig ? sig[i] : 1.0);
579 }
580
581 /*
582 * Equation 15.2.17 for b, do 1/Stt * summation
583 * Equation 15.2.18 for a
584 * Equation 15.2.19 for sig_a
585 * Equation 15.2.20 for sig_b
586 */
587 *b /= Stt;
588 *a = (Sy - *b * Sx) / S;
589 *sig_a = sqrt((1.0 + (Sx * Sx) / (S * Stt)) / S);
590 *sig_b = sqrt(1.0 / Stt);
591
592 /* Equation 15.2.2 for chi2, the merit function */
593 *chi2 = 0.0;
594 for (i = 0; i < n; ++i) {
595 double merit = (y[i] - ((*a) + (*b) * x[i])) / (sig ? sig[i] : 1.0);
596 *chi2 += merit * merit;
597 }
598 if (sig == NULL) {
599 *sig_a *= sqrt((*chi2) / (n - 2));
600 *sig_b *= sqrt((*chi2) / (n - 2));
601 }
602 }
603
604