1 /*
2 * R : A Computer Language for Statistical Data Analysis
3 * Copyright (C) 2012-2019 The R Core Team
4 * Copyright (C) 2003 The R Foundation
5 * Copyright (C) 1995-2002 Martin Maechler <maechler@stat.math.ethz.ch>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, a copy is available at
19 * https://www.R-project.org/Licenses/
20 */
21
22 #include "modreg.h"
23
24 // Large value, to replace { NaN | NA } values with for NA_BIG_alternate_* :
25 static double
26 BIG_dbl = 8.888888888e307;
27 // 1 3 5 7 9, // ~ < 2^1023 [such that +/- 2*BIG_dbl is still normalized]
28
29 enum { NA_BIG_alternate_P = 1, NA_BIG_alternate_M, NA_OMIT, NA_FAIL };
30 // == 1,2,..., defined by order in formals(runmed)$na.action --> ../R/runmed.R
31
32 #include "Trunmed.c"
33 // ---------
34
Srunmed(const double * y,double * smo,R_xlen_t n,int bw,int end_rule,int print_level)35 static void Srunmed(const double* y, double* smo, R_xlen_t n, int bw,
36 int end_rule, int print_level)
37 {
38 /*
39 * Computes "Running Median" smoother ("Stuetzle" algorithm) with medians of 'band'
40
41 * Input:
42 * y(n) - responses in order of increasing predictor values
43 * n - number of observations
44 * bw - span of running medians (MUST be ODD !!)
45 * end_rule -- 0: Keep original data at ends {j; j < b2 | j > n-b2}
46 * -- 1: Constant ends = median(y[1],..,y[bw]) "robust"
47 * Output:
48 * smo(n) - smoothed responses
49
50 * NOTE: The 'end' values are just copied !! this is fast but not too nice !
51 */
52
53 /* Local variables */
54 double rmed, rmin, temp, rnew, yout, yi;
55 double rbe, rtb, rse, yin, rts;
56 int imin, ismo, i, j, first, last, band2, kminus, kplus;
57
58
59 double *scrat = (double *) R_alloc(bw, sizeof(double));
60 /*was malloc( (unsigned) bw * sizeof(double));*/
61
62 if(bw > n)
63 error(_("bandwidth/span of running medians is larger than n"));
64
65 /* 1. Compute 'rmed' := Median of the first 'band' values
66 ======================================================== */
67
68 for (int i = 0; i < bw; ++i)
69 scrat[i] = y[i];
70
71 /* find minimal value rmin = scrat[imin] <= scrat[j] */
72 rmin = scrat[0]; imin = 0;
73 for (int i = 1; i < bw; ++i)
74 if (scrat[i] < rmin) {
75 rmin = scrat[i]; imin = i;
76 }
77 /* swap scrat[0] <-> scrat[imin] */
78 temp = scrat[0]; scrat[0] = rmin; scrat[imin] = temp;
79 /* sort the rest of of scrat[] by bubble (?) sort -- */
80 for (int i = 2; i < bw; ++i) {
81 if (scrat[i] < scrat[i - 1]) {/* find the proper place for scrat[i] */
82 temp = scrat[i];
83 j = i;
84 do {
85 scrat[j] = scrat[j - 1];
86 --j;
87 } while (scrat[j - 1] > temp); /* now: scrat[j-1] <= temp */
88 scrat[j] = temp;
89 }
90 }
91
92 band2 = bw / 2;
93 rmed = scrat[band2];/* == Median( y[(1:band2)-1] ) */
94 /* "malloc" had free( (char*) scrat);*/ /*-- release scratch memory --*/
95
96 if(end_rule == 0) { /*-- keep DATA at end values */
97 for (i = 0; i < band2; ++i)
98 smo[i] = y[i];
99 }
100 else { /* if(end_rule == 1) copy median to CONSTANT end values */
101 for (i = 0; i < band2; ++i)
102 smo[i] = rmed;
103 }
104 smo[band2] = rmed;
105 band2++; /* = bw / 2 + 1*/;
106
107 if(print_level >= 1) REprintf("(bw,b2)= (%d,%d)\n", bw, band2);
108
109 /* Big FOR Loop: RUNNING median, update the median 'rmed'
110 ======================================================= */
111 for (first = 1, last = bw, ismo = band2;
112 last < n;
113 ++first, ++last, ++ismo) {
114
115 yin = y[last];
116 yout = y[first - 1];
117
118 if(print_level >= 2) REprintf(" is=%d, y(in/out)= %10g, %10g", ismo, yin, yout);
119
120 rnew = rmed; /* New median = old one in all the simple cases --*/
121
122 if (yin < rmed) {
123 if (yout >= rmed) {
124 kminus = 0;
125 if (yout > rmed) {/* --- yin < rmed < yout --- */
126 if(print_level >= 2) REprintf(": yin < rmed < yout ");
127 rnew = yin;/* was -rinf */
128 for (i = first; i <= last; ++i) {
129 yi = y[i];
130 if (yi < rmed) {
131 ++kminus;
132 if (yi > rnew) rnew = yi;
133 }
134 }
135 if (kminus < band2) rnew = rmed;
136 }
137 else {/* --- yin < rmed = yout --- */
138 if(print_level >= 2) REprintf(": yin < rmed == yout ");
139 rse = rts = yin;/* was -rinf */
140 for (i = first; i <= last; ++i) {
141 yi = y[i];
142 if (yi <= rmed) {
143 if (yi < rmed) {
144 ++kminus;
145 if (yi > rts) rts = yi;
146 if (yi > rse) rse = yi;
147 } else rse = yi;
148
149 }
150 }
151 rnew = (kminus == band2) ? rts : rse ;
152 if(print_level >= 2) REprintf("k- : %d,", kminus);
153 }
154 } /* else: both yin, yout < rmed -- nothing to do .... */
155 }
156 else if (yin != rmed) { /* yin > rmed */
157 if (yout <= rmed) {
158 kplus = 0;
159 if (yout < rmed) {/* -- yout < rmed < yin --- */
160 if(print_level >= 2) REprintf(": yout < rmed < yin ");
161 rnew = yin; /* was rinf */
162 for (i = first; i <= last; ++i) {
163 yi = y[i];
164 if (yi > rmed) {
165 ++kplus;
166 if (yi < rnew) rnew = yi;
167 }
168 }
169 if (kplus < band2) rnew = rmed;
170
171 } else { /* -- yout = rmed < yin --- */
172 if(print_level >= 2) REprintf(": yout == rmed < yin ");
173 rbe = rtb = yin; /* was rinf */
174 for (i = first; i <= last; ++i) {
175 yi = y[i];
176 if (yi >= rmed) {
177 if (yi > rmed) {
178 ++kplus;
179 if (yi < rtb) rtb = yi;
180 if (yi < rbe) rbe = yi;
181 } else rbe = yi;
182 }
183 }
184 rnew = (kplus == band2) ? rtb : rbe;
185 if(print_level >= 2) REprintf("k+ : %d,", kplus);
186 }
187 } /* else: both yin, yout > rmed --> nothing to do */
188 } /* else: yin == rmed -- nothing to do .... */
189 if(print_level >= 2) REprintf("=> %12g, %12g\n", rmed, rnew);
190 rmed = rnew;
191 smo[ismo] = rmed;
192 } /* END FOR ------------ big Loop -------------------- */
193
194 if(end_rule == 0) { /*-- keep DATA at end values */
195 for (i = ismo; i < n; ++i)
196 smo[i] = y[i];
197 }
198 else { /* if(end_rule == 1) copy median to CONSTANT end values */
199 for (i = ismo; i < n; ++i)
200 smo[i] = rmed;
201 }
202 } /* Srunmed */
203
204 // anyNA() like [ see ../../../main/coerce.c ]
205 static
R_firstNA_dbl(const double x[],R_xlen_t n)206 R_xlen_t R_firstNA_dbl(const double x[], R_xlen_t n) {
207 for(R_xlen_t k = 0; k < n; k++)
208 if(ISNAN(x[k]))
209 return k+1; // 1-index
210 return 0; // no NaN|NA found
211 }
212
213 // .Call()ed from ../R/runmed.R
runmed(SEXP sx,SEXP stype,SEXP sk,SEXP end,SEXP naAct,SEXP printLev)214 SEXP runmed(SEXP sx, SEXP stype, SEXP sk, SEXP end, SEXP naAct, SEXP printLev)
215 {
216 if (TYPEOF(sx) != REALSXP) error("numeric 'x' required");
217 double *x = REAL(sx), *xx;
218 R_xlen_t n = XLENGTH(sx);
219 int type = asInteger(stype),
220 k = asInteger(sk),
221 end_rule = asInteger(end),
222 na_action = asInteger(naAct),
223 print_level = asInteger(printLev);
224 R_xlen_t
225 firstNA = R_firstNA_dbl(x, n),
226 nn = n;
227 if(print_level)
228 Rprintf("firstNA = %d%s.\n", firstNA,
229 (firstNA == 0) ? " <=> *no* NA/NaN" : "");
230 if(firstNA) { // anyNA(x)
231 Rboolean NA_pos = TRUE;
232 switch(na_action) {
233 case NA_BIG_alternate_M:
234 NA_pos = FALSE; // <<-- "M"inus: *not* positive
235 // no break; --> continue
236 case NA_BIG_alternate_P: {
237 xx = (double *) R_alloc(n, sizeof(double));
238 for(R_xlen_t i=0; i < n; i++) {
239 if(ISNAN(x[i])) {
240 // replace NaN with +/- BIG (< Inf!), switching sign every time:
241 xx[i] = (NA_pos ? BIG_dbl : -BIG_dbl);
242 NA_pos = !NA_pos; // switch
243 } else
244 xx[i] = x[i];
245 }
246 break;
247 }
248 case NA_OMIT: {
249 R_xlen_t i1 = firstNA-1; // firstNA is "1-index"
250 // xx <- x[!is.na(x)] :
251 xx = (double *) R_alloc(n-1, sizeof(double)); // too much if sum(is.na(.)) > 1
252 if(i1 > 1) Memcpy(xx, x, i1-1);
253 for(R_xlen_t i=i1, ix=i; i < n; i++) { // i-ix == n-nn {identity}
254 if(ISNAN(x[i])) // drop NA/NaN and shift all to the left by 1 :
255 nn--; // nn + (i-ix) == n
256 else
257 xx[ix++] = x[i];
258 } // --> now xx[1:nn] == x[!is.na(x)]
259 break;
260 }
261 case NA_FAIL:
262 error(_("runmed(x, .., na.action=\"na.fail\"): have NAs starting at x[%ld]"),
263 firstNA);
264 default:
265 error(_("runmed(): invalid 'na.action'"));
266 }
267
268 } else { // no NAs: xx just points to x; wont be modified
269 xx = x;
270 }
271
272 SEXP ans = PROTECT(allocVector(REALSXP, n));
273
274 if (type == 1) {
275 if (IS_LONG_VEC(sx))
276 error("long vectors are not supported for algorithm = \"Turlach\"");
277 Trunmed(xx, REAL(ans), nn, k, end_rule, print_level);
278 } else {
279 Srunmed(xx, REAL(ans), nn, k, end_rule, print_level);
280 }
281 if(firstNA) {
282 double *median = REAL(ans);
283 switch(na_action) {
284 case NA_BIG_alternate_P:
285 case NA_BIG_alternate_M: { // revert the +-BIG replacements
286 for(R_xlen_t i=firstNA-1; i < n; i++)
287 if(ISNAN(x[i]) && !ISNAN(median[i]) && fabs(median[i]) == BIG_dbl)
288 median[i] = x[i];
289 break;
290 }
291 case NA_OMIT: { /* fill the shortened median[] series into the result,
292 putting x[i] into places i where ISNAN(x[i]) */
293 if(print_level) {
294 Rprintf("na.omit: reduced n = nn = %d.\n", nn);
295 if(print_level >= 2) {
296 Rprintf("median[] = ");
297 for(R_xlen_t i=0; i < nn; i++) {
298 if(i % 20 == 0) Rprintf("\n[%d] ", i);
299 Rprintf("%5g", median[i]);
300 }
301 Rprintf("\n");
302 }
303 }
304 double *med = (double *)R_alloc(nn, sizeof(double));
305 if(nn) Memcpy(med, median, nn);
306 for(R_xlen_t i=firstNA-1, ix=i; i < n; i++) {
307 if(ISNAN(x[i])) {
308 median[i] = x[i];
309 } else {
310 median[i] = med[ix++];
311 }
312 }
313 break;
314 }
315 default: error(_("na_action logic error (%d), please report!"), na_action);
316 }
317 }
318 UNPROTECT(1);
319 return ans;
320 }
321