1 #define PERL_NO_GET_CONTEXT /* we want efficiency */
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5
6 #include "ppport.h"
7
8 #include "mt.h"
9 #include "stats.h"
10 #include <stdlib.h>
11
12
13 typedef struct mt * Statistics__CaseResampling__RdGen;
14
15 void*
U32ArrayPtr(pTHX_ int n)16 U32ArrayPtr (pTHX_ int n)
17 {
18 SV * sv = sv_2mortal( NEWSV( 0, n*sizeof(U32) ) );
19 return SvPVX(sv);
20 }
21
22 double
cs_mean_av(pTHX_ AV * sample)23 cs_mean_av(pTHX_ AV* sample)
24 {
25 I32 i, n;
26 SV** elem;
27 double sum;
28 n = av_len(sample)+1;
29 sum = 0.;
30 for (i = 0; i < n; ++i) {
31 if (NULL == (elem = av_fetch(sample, i, 0))) {
32 croak("Could not fetch element from array");
33 }
34 else
35 sum += SvNV(*elem);
36 }
37 return sum/(double)n;
38 }
39
40 double
cs_sum_deviation_squared_av(pTHX_ double mean,AV * sample)41 cs_sum_deviation_squared_av(pTHX_ double mean, AV* sample)
42 {
43 I32 i, n;
44 SV** elem;
45 double sum;
46 n = av_len(sample)+1;
47 sum = 0.;
48 for (i = 0; i < n; ++i) {
49 if (NULL == (elem = av_fetch(sample, i, 0))) {
50 croak("Could not fetch element from array");
51 }
52 else
53 sum += pow(SvNV(*elem)-mean, 2);
54 }
55 return sum;
56 }
57
58 void
avToCAry(pTHX_ AV * in,double ** out,I32 * n)59 avToCAry(pTHX_ AV* in, double** out, I32* n)
60 {
61 I32 thisN;
62 double* ary;
63 SV** elem;
64 I32 i;
65 thisN = av_len(in)+1;
66 *n = thisN;
67 if (thisN == 0)
68 return;
69
70 Newx(ary, thisN, double);
71 *out = ary;
72 for (i = 0; i < thisN; ++i) {
73 if (NULL == (elem = av_fetch(in, i, 0))) {
74 Safefree(ary);
75 croak("Could not fetch element from array");
76 }
77 else
78 ary[i] = SvNV(*elem);
79 }
80 }
81
82 void
cAryToAV(pTHX_ double * in,AV ** out,I32 n)83 cAryToAV(pTHX_ double* in, AV** out, I32 n)
84 {
85 SV* elem;
86 I32 i;
87 *out = newAV();
88 if (n == 0)
89 return;
90
91 av_extend(*out, n-1);
92
93 for (i = 0; i < n; ++i) {
94 elem = newSVnv(in[i]);
95 if (NULL == av_store(*out, i, elem))
96 SvREFCNT_dec(elem);
97 }
98 }
99
100 struct mt*
get_rnd(pTHX)101 get_rnd(pTHX)
102 {
103 IV tmp;
104 SV* therndsv = get_sv("Statistics::CaseResampling::Rnd", 0);
105 if (therndsv == NULL
106 || !SvROK(therndsv)
107 || !sv_derived_from(therndsv, "Statistics::CaseResampling::RdGen"))
108 {
109 croak("Random number generator not set up!");
110 }
111 tmp = SvIV((SV*)SvRV(therndsv));
112 return INT2PTR(struct mt*, tmp);
113 }
114
115
116 MODULE = Statistics::CaseResampling PACKAGE = Statistics::CaseResampling
117 PROTOTYPES: DISABLE
118
119 INCLUDE: RdGen.xs.inc
120
121 MODULE = Statistics::CaseResampling PACKAGE = Statistics::CaseResampling
122 PROTOTYPES: DISABLE
123
124 AV*
125 resample(sample)
126 AV* sample
127 PREINIT:
128 I32 nelem;
129 double* csample;
130 double* destsample;
131 struct mt* rnd;
132 CODE:
133 rnd = get_rnd(aTHX);
134 avToCAry(aTHX_ sample, &csample, &nelem);
135 if (nelem != 0) {
136 Newx(destsample, nelem, double);
137 do_resample(csample, nelem, rnd, destsample);
138 cAryToAV(aTHX_ destsample, &RETVAL, nelem);
139 Safefree(destsample);
140 }
141 else
142 RETVAL = newAV();
143 Safefree(csample);
144 sv_2mortal((SV*)RETVAL);
145 OUTPUT: RETVAL
146
147
148 AV*
149 resample_medians(sample, runs)
150 AV* sample
151 I32 runs
152 PREINIT:
153 I32 nelem;
154 I32 i_run;
155 double* csample;
156 double* destsample;
157 struct mt* rnd;
158 CODE:
159 rnd = get_rnd(aTHX);
160 avToCAry(aTHX_ sample, &csample, &nelem);
161 RETVAL = newAV();
162 if (nelem != 0) {
163 Newx(destsample, nelem, double);
164 av_extend(RETVAL, runs-1);
165 for (i_run = 0; i_run < runs; ++i_run) {
166 do_resample(csample, nelem, rnd, destsample);
167 av_store(RETVAL, i_run, newSVnv(cs_median(destsample, nelem)));
168 }
169 Safefree(destsample);
170 }
171 Safefree(csample);
172 sv_2mortal((SV*)RETVAL);
173 OUTPUT: RETVAL
174
175
176 AV*
177 resample_means(sample, runs)
178 AV* sample
179 I32 runs
180 PREINIT:
181 I32 nelem;
182 I32 i_run;
183 double* csample;
184 double* destsample;
185 struct mt* rnd;
186 CODE:
187 rnd = get_rnd(aTHX);
188 avToCAry(aTHX_ sample, &csample, &nelem);
189 RETVAL = newAV();
190 if (nelem != 0) {
191 Newx(destsample, nelem, double);
192 av_extend(RETVAL, runs-1);
193 for (i_run = 0; i_run < runs; ++i_run) {
194 do_resample(csample, nelem, rnd, destsample);
195 av_store(RETVAL, i_run, newSVnv(cs_mean(destsample, nelem)));
196 }
197 Safefree(destsample);
198 }
199 Safefree(csample);
200 sv_2mortal((SV*)RETVAL);
201 OUTPUT: RETVAL
202
203
204 double
205 median(sample)
206 AV* sample
207 PREINIT:
208 I32 nelem;
209 double* csample;
210 CODE:
211 avToCAry(aTHX_ sample, &csample, &nelem);
212 if (nelem == 0)
213 RETVAL = 0.;
214 else
215 RETVAL = cs_median(csample, nelem);
216 Safefree(csample);
217 OUTPUT: RETVAL
218
219 double
220 median_absolute_deviation(sample)
221 AV* sample
222 PREINIT:
223 I32 nelem;
224 double* csample;
225 CODE:
226 avToCAry(aTHX_ sample, &csample, &nelem);
227 if (nelem == 0)
228 RETVAL = 0.;
229 else {
230 unsigned int i;
231 double* absdev;
232
233 const double median = cs_median(csample, nelem);
234 /* in principle, I think one could write an algorithm
235 * that doesn't require mallocing the second array by inlining an
236 * O(n) median that visits each element only once? */
237 absdev = (double*)malloc(nelem * sizeof(double));
238 for (i = 0; i < nelem; ++i)
239 absdev[i] = fabs(csample[i] - median);
240 RETVAL = cs_median(absdev, nelem);
241 free(absdev);
242 }
243 Safefree(csample);
244 OUTPUT: RETVAL
245
246
247 double
248 first_quartile(sample)
249 AV* sample
250 PREINIT:
251 I32 nelem;
252 double* csample;
253 CODE:
254 avToCAry(aTHX_ sample, &csample, &nelem);
255 if (nelem == 0)
256 RETVAL = 0.;
257 else
258 RETVAL = cs_first_quartile(csample, nelem);
259 Safefree(csample);
260 OUTPUT: RETVAL
261
262
263 double
264 third_quartile(sample)
265 AV* sample
266 PREINIT:
267 I32 nelem;
268 double* csample;
269 CODE:
270 avToCAry(aTHX_ sample, &csample, &nelem);
271 if (nelem == 0)
272 RETVAL = 0.;
273 else
274 RETVAL = cs_third_quartile(csample, nelem);
275 Safefree(csample);
276 OUTPUT: RETVAL
277
278
279 double
280 mean(sample)
281 AV* sample
282 CODE:
283 RETVAL = cs_mean_av(aTHX_ sample);
284 OUTPUT: RETVAL
285
286
287 double
288 sample_standard_deviation(mean, sample)
289 SV* mean
290 AV* sample
291 CODE:
292 RETVAL = cs_sum_deviation_squared_av(aTHX_ SvNV(mean), sample);
293 RETVAL = pow( RETVAL / av_len(sample), 0.5 ); /* av_len() is N-1! */
294 OUTPUT: RETVAL
295
296
297 double
298 population_standard_deviation(mean, sample)
299 SV* mean
300 AV* sample
301 CODE:
302 RETVAL = cs_sum_deviation_squared_av(aTHX_ SvNV(mean), sample);
303 RETVAL = pow( RETVAL / (av_len(sample)+1), 0.5 ); /* av_len() is N-1! */
304 OUTPUT: RETVAL
305
306
307 double
308 select_kth(sample, kth)
309 AV* sample
310 I32 kth
311 PREINIT:
312 I32 nelem;
313 double* csample;
314 CODE:
315 avToCAry(aTHX_ sample, &csample, &nelem);
316 if (kth < 1 || kth > nelem) {
317 croak("Can't select %ith smallest element from a list of %i elements", kth, nelem);
318 }
319 RETVAL = cs_select(csample, nelem, kth-1);
320 Safefree(csample);
321 OUTPUT: RETVAL
322
323
324 void
325 median_simple_confidence_limits(sample, confidence...)
326 AV* sample
327 double confidence
328 PREINIT:
329 /* "confidence" is 1-alpha */
330 I32 runs, nelem, i_run;
331 double *csample, *destsample, *medians;
332 struct mt* rnd;
333 double median = 0.;
334 double lower_ci = 0.;
335 double upper_ci = 0.;
336 double alpha;
337 INIT:
338 alpha = 1.-confidence;
339 PPCODE:
340 if (items == 2)
341 runs = 1000;
342 else if (items == 3)
343 runs = SvUV(ST(2));
344 else {
345 croak("Usage: ($lower, $median, $upper) = median_confidence_limits(\\@sample, $confidence, [$nruns]);");
346 }
347 if (confidence <= 0. || confidence >= 1.) {
348 croak("Confidence level has to be in (0, 1)");
349 }
350 rnd = get_rnd(aTHX);
351 avToCAry(aTHX_ sample, &csample, &nelem);
352 if (nelem != 0) {
353 median = cs_median(csample, nelem);
354 Newx(medians, runs, double);
355 Newx(destsample, nelem, double);
356 for (i_run = 0; i_run < runs; ++i_run) {
357 do_resample(csample, nelem, rnd, destsample);
358 medians[i_run] = cs_median(destsample, nelem);
359 }
360 Safefree(destsample);
361 /* lower = t - (t*_((R+1)*(1-alpha)) - t)
362 * upper = t - (t*_((R+1)*alpha) - t)
363 */
364 lower_ci = 2.*median - cs_select( medians, runs, (I32)((runs+1.)*(1.-alpha)) );
365 upper_ci = 2.*median - cs_select( medians, runs, (I32)((runs+1.)*alpha) );
366 Safefree(medians);
367 }
368 Safefree(csample);
369 EXTEND(SP, 3);
370 mPUSHn(lower_ci);
371 mPUSHn(median);
372 mPUSHn(upper_ci);
373
374
375 void
376 simple_confidence_limits_from_samples(statistic, statistics, confidence)
377 double statistic
378 AV* statistics
379 double confidence
380 PREINIT:
381 /* "confidence" is 1-alpha */
382 I32 nelem;
383 double *cstatistics;
384 double lower_ci = 0.;
385 double upper_ci = 0.;
386 double alpha;
387 INIT:
388 alpha = 1.-confidence;
389 PPCODE:
390 if (confidence <= 0. || confidence >= 1.) {
391 croak("Confidence level has to be in (0, 1)");
392 }
393 avToCAry(aTHX_ statistics, &cstatistics, &nelem);
394 if (nelem != 0) {
395 /* lower = t - (t*_((R+1)*(1-alpha)) - t)
396 * upper = t - (t*_((R+1)*alpha) - t)
397 */
398 lower_ci = 2.*statistic - cs_select( cstatistics, nelem, (I32)((nelem+1.)*(1.-alpha)) );
399 upper_ci = 2.*statistic - cs_select( cstatistics, nelem, (I32)((nelem+1.)*alpha) );
400 }
401 Safefree(cstatistics);
402 EXTEND(SP, 3);
403 mPUSHn(lower_ci);
404 mPUSHn(statistic);
405 mPUSHn(upper_ci);
406
407
408 double
409 approx_erf(x)
410 double x
411 CODE:
412 RETVAL = cs_approx_erf(x);
413 OUTPUT: RETVAL
414
415
416 double
417 approx_erf_inv(x)
418 double x
419 CODE:
420 if (x <= 0. || x >= 1.)
421 croak("The inverse error function is defined in (0,1). %f is outside that range", x);
422 RETVAL = cs_approx_erf_inv(x);
423 OUTPUT: RETVAL
424
425
426 double
427 alpha_to_nsigma(x)
428 double x
429 CODE:
430 RETVAL = cs_alpha_to_nsigma(x);
431 OUTPUT: RETVAL
432
433
434 double
435 nsigma_to_alpha(x)
436 double x
437 CODE:
438 RETVAL = cs_nsigma_to_alpha(x);
439 OUTPUT: RETVAL
440
441