1 /***************************************************************************
2  TEMPLATE:
3   binMeans_<L|R>(...)
4 
5  GENERATES:
6   void binMeans_L(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count)
7   void binMeans_R(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count)
8 
9  Arguments:
10    The following macros ("arguments") should be defined for the
11    template to work as intended.
12   - BIN_BY: 'L' or 'R'
13 
14  Copyright Henrik Bengtsson, 2012-2013
15  **************************************************************************/
16 #include "000.types.h"
17 
18 #if BIN_BY == 'L'   /* [u,v) */
19   #define METHOD_NAME binMeans_L
20   #define IS_PART_OF_FIRST_BIN(x, bx0) (x < bx0)
21   #define IS_PART_OF_NEXT_BIN(x, bx1) (x >= bx1)
22 #elif BIN_BY == 'R' /* (u,v] */
23   #define METHOD_NAME binMeans_R
24   #define IS_PART_OF_FIRST_BIN(x, bx0) (x <= bx0)
25   #define IS_PART_OF_NEXT_BIN(x, bx1) (x > bx1)
26 #endif
27 
28 
METHOD_NAME(double * y,R_xlen_t ny,double * x,R_xlen_t nx,double * bx,R_xlen_t nbins,double * ans,int * count)29 void METHOD_NAME(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) {
30   R_xlen_t ii = 0, jj = 0, iStart=0;
31   R_xlen_t n = 0;
32   LDOUBLE sum = 0.0;
33   int warn = 0;
34 
35   // Count?
36   if (nbins > 0) {
37 
38     // Skip to the first bin
39     while ((iStart < nx) && IS_PART_OF_FIRST_BIN(x[iStart], bx[0])) {
40       ++iStart;
41     }
42 
43     // For each x...
44     for (ii = iStart; ii < nx; ++ii) {
45 
46       // Skip to a new bin?
47       while (IS_PART_OF_NEXT_BIN(x[ii], bx[jj+1])) {
48         // Update statistic of current bin?
49         if (count) {
50           /* Although unlikely, with long vectors the count for a bin
51              can become greater than what is possible to represent by
52              an integer.  Detect and warn about this. */
53           if (n > R_INT_MAX) {
54             warn = 1;
55             count[jj] = R_INT_MAX;
56           } else {
57             count[jj] = n;
58           }
59         }
60         ans[jj] = n > 0 ? sum / n : R_NaN;
61         sum = 0.0;
62         n = 0;
63 
64         // ...and move to next
65         ++jj;
66 
67         // No more bins?
68         if (jj >= nbins) {
69           // Make the outer for-loop to exit...
70           ii = nx - 1;
71           // ...but correct for the fact that the y[nx-1] point will
72           // be incorrectly added to the sum.  Doing the correction
73           // here avoids an if (ii < nx) sum += y[ii] below.
74           sum -= y[ii];
75           break;
76         }
77       }
78 
79       // Sum and count
80       sum += y[ii];
81       ++n;
82 
83       /* Early LDOUBLE stopping? */
84       if (n % 1048576 == 0 && !R_FINITE(sum)) break;
85     }
86 
87     // Update statistic of the last bin?
88     if (jj < nbins) {
89       if (count) {
90         /* Although unlikely, with long vectors the count for a bin
91            can become greater than what is possible to represent by
92            an integer.  Detect and warn about this. */
93         if (n > R_INT_MAX) {
94           warn= 1;
95           count[jj] = R_INT_MAX;
96         } else {
97           count[jj] = n;
98         }
99       }
100       ans[jj] = n > 0 ? sum / n : R_NaN;
101 
102       // Assign the remaining bins to zero counts and missing mean values
103       while (++jj < nbins) {
104         ans[jj] = R_NaN;
105         if (count) count[jj] = 0;
106       }
107     }
108 
109   } // if (nbins > 0)
110 
111   if (warn) {
112     warning("Integer overflow. Detected one or more bins with a count that is greater than what can be represented by the integer data type. Setting count to the maximum integer possible (.Machine$integer.max = %d). The bin mean is still correct.", R_INT_MAX);
113   }
114 }
115 
116 
117 /* Undo template macros */
118 #undef BIN_BY
119 #undef IS_PART_OF_FIRST_BIN
120 #undef IS_PART_OF_NEXT_BIN
121 #include "000.templates-types_undef.h"
122 
123 
124 
125 /***************************************************************************
126  HISTORY:
127 2014-11-07 [HB]
128   o ROBUSTNESS: Added protection for integer overflow in bin counts.
129 2014-11-06 [HB]
130   o CLEANUP: Moving away from R data types in low-level C functions.
131 2014-10-01 [HB]
132   o BUG FIX: binMeans() returned 0.0 instead of NA_real_ for empty bins.
133 2014-04-04 [HB]
134   o BUG FIX: The native code of binMeans(x, bx) would try to access
135     an out-of-bounds value of argument 'y' iff 'x' contained elements
136     that are left of all bins in 'bx'.  This bug had no impact on the
137     results and since no assignment was done it should also not crash/
138     core dump R.  This was discovered thanks to new memtests (ASAN and
139     valgrind) provided by CRAN.
140  2013-10-08 [HB]
141   o Created template for binMeans_<L|R>() to create functions that
142     bin either by [u,v) or (u,v].
143  2013-05-10 [HB]
144   o SPEEDUP: binMeans() no longer tests in every iteration (=for every
145     data point) whether the last bin has been reached or not.
146  2012-10-10 [HB]
147   o BUG FIX: binMeans() would return random/garbage means/counts for
148     bins that were beyond the last data point.
149   o BUG FIX: In some cases binMeans() could try to go past the last bin.
150  2012-10-03 [HB]
151   o Created binMeans(), which was adopted from from code proposed by
152     Martin Morgan (Fred Hutchinson Cancer Research Center, Seattle) as
153     a reply to HB's R-devel thread 'Fastest non-overlapping binning mean
154     function out there?' on Oct 3, 2012.
155  **************************************************************************/
156