1 #define PERL_NO_GET_CONTEXT     /* we want efficiency */
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 
6 #include "ppport.h"
7 
8 #include "mt.h"
9 #include "stats.h"
10 #include <stdlib.h>
11 
12 
13 typedef struct mt * Statistics__CaseResampling__RdGen;
14 
15 void*
U32ArrayPtr(pTHX_ int n)16 U32ArrayPtr (pTHX_ int n)
17 {
18   SV * sv = sv_2mortal( NEWSV( 0, n*sizeof(U32) ) );
19   return SvPVX(sv);
20 }
21 
22 double
cs_mean_av(pTHX_ AV * sample)23 cs_mean_av(pTHX_ AV* sample)
24 {
25   I32 i, n;
26   SV** elem;
27   double sum;
28   n = av_len(sample)+1;
29   sum = 0.;
30   for (i = 0; i < n; ++i) {
31     if (NULL == (elem = av_fetch(sample, i, 0))) {
32       croak("Could not fetch element from array");
33     }
34     else
35       sum += SvNV(*elem);
36   }
37   return sum/(double)n;
38 }
39 
40 double
cs_sum_deviation_squared_av(pTHX_ double mean,AV * sample)41 cs_sum_deviation_squared_av(pTHX_ double mean, AV* sample)
42 {
43   I32 i, n;
44   SV** elem;
45   double sum;
46   n = av_len(sample)+1;
47   sum = 0.;
48   for (i = 0; i < n; ++i) {
49     if (NULL == (elem = av_fetch(sample, i, 0))) {
50       croak("Could not fetch element from array");
51     }
52     else
53       sum += pow(SvNV(*elem)-mean, 2);
54   }
55   return sum;
56 }
57 
58 void
avToCAry(pTHX_ AV * in,double ** out,I32 * n)59 avToCAry(pTHX_ AV* in, double** out, I32* n)
60 {
61   I32 thisN;
62   double* ary;
63   SV** elem;
64   I32 i;
65   thisN = av_len(in)+1;
66   *n = thisN;
67   if (thisN == 0)
68     return;
69 
70   Newx(ary, thisN, double);
71   *out = ary;
72   for (i = 0; i < thisN; ++i) {
73     if (NULL == (elem = av_fetch(in, i, 0))) {
74       Safefree(ary);
75       croak("Could not fetch element from array");
76     }
77     else
78       ary[i] = SvNV(*elem);
79   }
80 }
81 
82 void
cAryToAV(pTHX_ double * in,AV ** out,I32 n)83 cAryToAV(pTHX_ double* in, AV** out, I32 n)
84 {
85   SV* elem;
86   I32 i;
87   *out = newAV();
88   if (n == 0)
89     return;
90 
91   av_extend(*out, n-1);
92 
93   for (i = 0; i < n; ++i) {
94     elem = newSVnv(in[i]);
95     if (NULL == av_store(*out, i, elem))
96       SvREFCNT_dec(elem);
97   }
98 }
99 
100 struct mt*
get_rnd(pTHX)101 get_rnd(pTHX)
102 {
103   IV tmp;
104   SV* therndsv = get_sv("Statistics::CaseResampling::Rnd", 0);
105   if (therndsv == NULL
106       || !SvROK(therndsv)
107       || !sv_derived_from(therndsv, "Statistics::CaseResampling::RdGen"))
108   {
109     croak("Random number generator not set up!");
110   }
111   tmp = SvIV((SV*)SvRV(therndsv));
112   return INT2PTR(struct mt*, tmp);
113 }
114 
115 
116 MODULE = Statistics::CaseResampling		PACKAGE = Statistics::CaseResampling
117 PROTOTYPES: DISABLE
118 
119 INCLUDE: RdGen.xs.inc
120 
121 MODULE = Statistics::CaseResampling		PACKAGE = Statistics::CaseResampling
122 PROTOTYPES: DISABLE
123 
124 AV*
125 resample(sample)
126     AV* sample
127   PREINIT:
128     I32 nelem;
129     double* csample;
130     double* destsample;
131     struct mt* rnd;
132   CODE:
133     rnd = get_rnd(aTHX);
134     avToCAry(aTHX_ sample, &csample, &nelem);
135     if (nelem != 0) {
136       Newx(destsample, nelem, double);
137       do_resample(csample, nelem, rnd, destsample);
138       cAryToAV(aTHX_ destsample, &RETVAL, nelem);
139       Safefree(destsample);
140     }
141     else
142       RETVAL = newAV();
143     Safefree(csample);
144     sv_2mortal((SV*)RETVAL);
145   OUTPUT: RETVAL
146 
147 
148 AV*
149 resample_medians(sample, runs)
150     AV* sample
151     I32 runs
152   PREINIT:
153     I32 nelem;
154     I32 i_run;
155     double* csample;
156     double* destsample;
157     struct mt* rnd;
158   CODE:
159     rnd = get_rnd(aTHX);
160     avToCAry(aTHX_ sample, &csample, &nelem);
161     RETVAL = newAV();
162     if (nelem != 0) {
163       Newx(destsample, nelem, double);
164       av_extend(RETVAL, runs-1);
165       for (i_run = 0; i_run < runs; ++i_run) {
166         do_resample(csample, nelem, rnd, destsample);
167         av_store(RETVAL, i_run, newSVnv(cs_median(destsample, nelem)));
168       }
169       Safefree(destsample);
170     }
171     Safefree(csample);
172     sv_2mortal((SV*)RETVAL);
173   OUTPUT: RETVAL
174 
175 
176 AV*
177 resample_means(sample, runs)
178     AV* sample
179     I32 runs
180   PREINIT:
181     I32 nelem;
182     I32 i_run;
183     double* csample;
184     double* destsample;
185     struct mt* rnd;
186   CODE:
187     rnd = get_rnd(aTHX);
188     avToCAry(aTHX_ sample, &csample, &nelem);
189     RETVAL = newAV();
190     if (nelem != 0) {
191       Newx(destsample, nelem, double);
192       av_extend(RETVAL, runs-1);
193       for (i_run = 0; i_run < runs; ++i_run) {
194         do_resample(csample, nelem, rnd, destsample);
195         av_store(RETVAL, i_run, newSVnv(cs_mean(destsample, nelem)));
196       }
197       Safefree(destsample);
198     }
199     Safefree(csample);
200     sv_2mortal((SV*)RETVAL);
201   OUTPUT: RETVAL
202 
203 
204 double
205 median(sample)
206     AV* sample
207   PREINIT:
208     I32 nelem;
209     double* csample;
210   CODE:
211     avToCAry(aTHX_ sample, &csample, &nelem);
212     if (nelem == 0)
213       RETVAL = 0.;
214     else
215       RETVAL = cs_median(csample, nelem);
216     Safefree(csample);
217   OUTPUT: RETVAL
218 
219 double
220 median_absolute_deviation(sample)
221     AV* sample
222   PREINIT:
223     I32 nelem;
224     double* csample;
225   CODE:
226     avToCAry(aTHX_ sample, &csample, &nelem);
227     if (nelem == 0)
228       RETVAL = 0.;
229     else {
230       unsigned int i;
231       double* absdev;
232 
233       const double median = cs_median(csample, nelem);
234       /* in principle, I think one could write an algorithm
235        * that doesn't require mallocing the second array by inlining an
236        * O(n) median that visits each element only once? */
237       absdev = (double*)malloc(nelem * sizeof(double));
238       for (i = 0; i < nelem; ++i)
239         absdev[i] = fabs(csample[i] - median);
240       RETVAL = cs_median(absdev, nelem);
241       free(absdev);
242     }
243     Safefree(csample);
244   OUTPUT: RETVAL
245 
246 
247 double
248 first_quartile(sample)
249     AV* sample
250   PREINIT:
251     I32 nelem;
252     double* csample;
253   CODE:
254     avToCAry(aTHX_ sample, &csample, &nelem);
255     if (nelem == 0)
256       RETVAL = 0.;
257     else
258       RETVAL = cs_first_quartile(csample, nelem);
259     Safefree(csample);
260   OUTPUT: RETVAL
261 
262 
263 double
264 third_quartile(sample)
265     AV* sample
266   PREINIT:
267     I32 nelem;
268     double* csample;
269   CODE:
270     avToCAry(aTHX_ sample, &csample, &nelem);
271     if (nelem == 0)
272       RETVAL = 0.;
273     else
274       RETVAL = cs_third_quartile(csample, nelem);
275     Safefree(csample);
276   OUTPUT: RETVAL
277 
278 
279 double
280 mean(sample)
281     AV* sample
282   CODE:
283     RETVAL = cs_mean_av(aTHX_ sample);
284   OUTPUT: RETVAL
285 
286 
287 double
288 sample_standard_deviation(mean, sample)
289     SV* mean
290     AV* sample
291   CODE:
292     RETVAL =  cs_sum_deviation_squared_av(aTHX_ SvNV(mean), sample);
293     RETVAL = pow( RETVAL / av_len(sample), 0.5 ); /* av_len() is N-1! */
294   OUTPUT: RETVAL
295 
296 
297 double
298 population_standard_deviation(mean, sample)
299     SV* mean
300     AV* sample
301   CODE:
302     RETVAL =  cs_sum_deviation_squared_av(aTHX_ SvNV(mean), sample);
303     RETVAL = pow( RETVAL / (av_len(sample)+1), 0.5 ); /* av_len() is N-1! */
304   OUTPUT: RETVAL
305 
306 
307 double
308 select_kth(sample, kth)
309     AV* sample
310     I32 kth
311   PREINIT:
312     I32 nelem;
313     double* csample;
314   CODE:
315     avToCAry(aTHX_ sample, &csample, &nelem);
316     if (kth < 1 || kth > nelem) {
317       croak("Can't select %ith smallest element from a list of %i elements", kth, nelem);
318     }
319     RETVAL = cs_select(csample, nelem, kth-1);
320     Safefree(csample);
321   OUTPUT: RETVAL
322 
323 
324 void
325 median_simple_confidence_limits(sample, confidence...)
326     AV* sample
327     double confidence
328   PREINIT:
329     /* "confidence" is 1-alpha */
330     I32 runs, nelem, i_run;
331     double *csample, *destsample, *medians;
332     struct mt* rnd;
333     double median   = 0.;
334     double lower_ci = 0.;
335     double upper_ci = 0.;
336     double alpha;
337   INIT:
338     alpha = 1.-confidence;
339   PPCODE:
340     if (items == 2)
341       runs = 1000;
342     else if (items == 3)
343       runs = SvUV(ST(2));
344     else {
345       croak("Usage: ($lower, $median, $upper) = median_confidence_limits(\\@sample, $confidence, [$nruns]);");
346     }
347     if (confidence <= 0. || confidence >= 1.) {
348       croak("Confidence level has to be in (0, 1)");
349     }
350     rnd = get_rnd(aTHX);
351     avToCAry(aTHX_ sample, &csample, &nelem);
352     if (nelem != 0) {
353       median = cs_median(csample, nelem);
354       Newx(medians, runs, double);
355       Newx(destsample, nelem, double);
356       for (i_run = 0; i_run < runs; ++i_run) {
357         do_resample(csample, nelem, rnd, destsample);
358         medians[i_run] = cs_median(destsample, nelem);
359       }
360       Safefree(destsample);
361       /* lower = t - (t*_((R+1)*(1-alpha)) - t)
362        * upper = t - (t*_((R+1)*alpha) - t)
363        */
364       lower_ci = 2.*median - cs_select( medians, runs, (I32)((runs+1.)*(1.-alpha)) );
365       upper_ci = 2.*median - cs_select( medians, runs, (I32)((runs+1.)*alpha) );
366       Safefree(medians);
367     }
368     Safefree(csample);
369     EXTEND(SP, 3);
370     mPUSHn(lower_ci);
371     mPUSHn(median);
372     mPUSHn(upper_ci);
373 
374 
375 void
376 simple_confidence_limits_from_samples(statistic, statistics, confidence)
377     double statistic
378     AV* statistics
379     double confidence
380   PREINIT:
381     /* "confidence" is 1-alpha */
382     I32 nelem;
383     double *cstatistics;
384     double lower_ci = 0.;
385     double upper_ci = 0.;
386     double alpha;
387   INIT:
388     alpha = 1.-confidence;
389   PPCODE:
390     if (confidence <= 0. || confidence >= 1.) {
391       croak("Confidence level has to be in (0, 1)");
392     }
393     avToCAry(aTHX_ statistics, &cstatistics, &nelem);
394     if (nelem != 0) {
395       /* lower = t - (t*_((R+1)*(1-alpha)) - t)
396        * upper = t - (t*_((R+1)*alpha) - t)
397        */
398       lower_ci = 2.*statistic - cs_select( cstatistics, nelem, (I32)((nelem+1.)*(1.-alpha)) );
399       upper_ci = 2.*statistic - cs_select( cstatistics, nelem, (I32)((nelem+1.)*alpha) );
400     }
401     Safefree(cstatistics);
402     EXTEND(SP, 3);
403     mPUSHn(lower_ci);
404     mPUSHn(statistic);
405     mPUSHn(upper_ci);
406 
407 
408 double
409 approx_erf(x)
410     double x
411   CODE:
412     RETVAL = cs_approx_erf(x);
413   OUTPUT: RETVAL
414 
415 
416 double
417 approx_erf_inv(x)
418     double x
419   CODE:
420     if (x <= 0. || x >= 1.)
421       croak("The inverse error function is defined in (0,1). %f is outside that range", x);
422     RETVAL = cs_approx_erf_inv(x);
423   OUTPUT: RETVAL
424 
425 
426 double
427 alpha_to_nsigma(x)
428     double x
429   CODE:
430     RETVAL = cs_alpha_to_nsigma(x);
431   OUTPUT: RETVAL
432 
433 
434 double
435 nsigma_to_alpha(x)
436     double x
437   CODE:
438     RETVAL = cs_nsigma_to_alpha(x);
439   OUTPUT: RETVAL
440 
441