1 /*
2  *  R : A Computer Language for Statistical Data Analysis
3  *  Copyright (C) 2012-2019  The R Core Team
4  *  Copyright (C) 2003       The R Foundation
5  *  Copyright (C) 1995-2002  Martin Maechler <maechler@stat.math.ethz.ch>
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, a copy is available at
19  *  https://www.R-project.org/Licenses/
20  */
21 
22 #include "modreg.h"
23 
24 // Large value, to replace { NaN | NA } values with for NA_BIG_alternate_* :
25 static double
26 BIG_dbl = 8.888888888e307;
27 //          1 3 5 7 9, // ~ < 2^1023 [such that +/- 2*BIG_dbl is still normalized]
28 
29 enum { NA_BIG_alternate_P = 1, NA_BIG_alternate_M, NA_OMIT, NA_FAIL };
30 // == 1,2,..., defined by order in formals(runmed)$na.action -->  ../R/runmed.R
31 
32 #include "Trunmed.c"
33 //        ---------
34 
Srunmed(const double * y,double * smo,R_xlen_t n,int bw,int end_rule,int print_level)35 static void Srunmed(const double* y, double* smo, R_xlen_t n, int bw,
36 		    int end_rule, int print_level)
37 {
38 /*
39  *  Computes "Running Median" smoother ("Stuetzle" algorithm) with medians of 'band'
40 
41  *  Input:
42  *	y(n)	- responses in order of increasing predictor values
43  *	n	- number of observations
44  *	bw	- span of running medians (MUST be ODD !!)
45  *	end_rule -- 0: Keep original data at ends {j; j < b2 | j > n-b2}
46  *		 -- 1: Constant ends = median(y[1],..,y[bw]) "robust"
47  *  Output:
48  *	smo(n)	- smoothed responses
49 
50  * NOTE:  The 'end' values are just copied !! this is fast but not too nice !
51  */
52 
53 /* Local variables */
54     double rmed, rmin, temp, rnew, yout, yi;
55     double rbe, rtb, rse, yin, rts;
56     int imin, ismo, i, j, first, last, band2, kminus, kplus;
57 
58 
59     double *scrat = (double *) R_alloc(bw, sizeof(double));
60     /*was  malloc( (unsigned) bw * sizeof(double));*/
61 
62     if(bw > n)
63 	error(_("bandwidth/span of running medians is larger than n"));
64 
65 /* 1. Compute  'rmed' := Median of the first 'band' values
66    ======================================================== */
67 
68     for (int i = 0; i < bw; ++i)
69 	scrat[i] = y[i];
70 
71     /* find minimal value  rmin = scrat[imin] <= scrat[j] */
72     rmin = scrat[0]; imin = 0;
73     for (int i = 1; i < bw; ++i)
74 	if (scrat[i] < rmin) {
75 	    rmin = scrat[i]; imin = i;
76 	}
77     /* swap scrat[0] <-> scrat[imin] */
78     temp = scrat[0]; scrat[0] = rmin; scrat[imin] = temp;
79     /* sort the rest of of scrat[] by bubble (?) sort -- */
80     for (int i = 2; i < bw; ++i) {
81 	if (scrat[i] < scrat[i - 1]) {/* find the proper place for scrat[i] */
82 	    temp = scrat[i];
83 	    j = i;
84 	    do {
85 		scrat[j] = scrat[j - 1];
86 		--j;
87 	    } while (scrat[j - 1] > temp); /* now:  scrat[j-1] <= temp */
88 	    scrat[j] = temp;
89 	}
90     }
91 
92     band2 = bw / 2;
93     rmed = scrat[band2];/* == Median( y[(1:band2)-1] ) */
94     /* "malloc" had  free( (char*) scrat);*/ /*-- release scratch memory --*/
95 
96     if(end_rule == 0) { /*-- keep DATA at end values */
97 	for (i = 0; i < band2; ++i)
98 	    smo[i] = y[i];
99     }
100     else { /* if(end_rule == 1)  copy median to CONSTANT end values */
101 	for (i = 0; i < band2; ++i)
102 	    smo[i] = rmed;
103     }
104     smo[band2] = rmed;
105     band2++; /* = bw / 2 + 1*/;
106 
107     if(print_level >= 1) REprintf("(bw,b2)= (%d,%d)\n", bw, band2);
108 
109 /* Big	FOR Loop: RUNNING median, update the median 'rmed'
110    ======================================================= */
111     for (first = 1, last = bw, ismo = band2;
112 	 last < n;
113 	 ++first, ++last, ++ismo) {
114 
115 	yin = y[last];
116 	yout = y[first - 1];
117 
118 	if(print_level >= 2) REprintf(" is=%d, y(in/out)= %10g, %10g", ismo, yin, yout);
119 
120 	rnew = rmed; /* New median = old one   in all the simple cases --*/
121 
122 	if (yin < rmed) {
123 	    if (yout >= rmed) {
124 		kminus = 0;
125 		if (yout > rmed) {/*	--- yin < rmed < yout --- */
126 		    if(print_level >= 2) REprintf(": yin < rmed < yout ");
127 		    rnew = yin;/* was -rinf */
128 		    for (i = first; i <= last; ++i) {
129 			yi = y[i];
130 			if (yi < rmed) {
131 			    ++kminus;
132 			    if (yi > rnew)	rnew = yi;
133 			}
134 		    }
135 		    if (kminus < band2)		rnew = rmed;
136 		}
137 		else {/*		--- yin < rmed = yout --- */
138 		    if(print_level >= 2) REprintf(": yin < rmed == yout ");
139 		    rse = rts = yin;/* was -rinf */
140 		    for (i = first; i <= last; ++i) {
141 			yi = y[i];
142 			if (yi <= rmed) {
143 			    if (yi < rmed) {
144 				++kminus;
145 				if (yi > rts)	rts = yi;
146 				if (yi > rse)	rse = yi;
147 			    } else		rse = yi;
148 
149 			}
150 		    }
151 		    rnew = (kminus == band2) ? rts : rse ;
152 		    if(print_level >= 2) REprintf("k- : %d,", kminus);
153 		}
154 	    } /* else: both  yin, yout < rmed -- nothing to do .... */
155 	}
156 	else if (yin != rmed) { /* yin > rmed */
157 	    if (yout <= rmed) {
158 		kplus = 0;
159 		if (yout < rmed) {/* -- yout < rmed < yin --- */
160 		    if(print_level >= 2) REprintf(": yout < rmed < yin ");
161 		    rnew = yin; /* was rinf */
162 		    for (i = first; i <= last; ++i) {
163 			yi = y[i];
164 			if (yi > rmed) {
165 			    ++kplus;
166 			    if (yi < rnew)	rnew = yi;
167 			}
168 		    }
169 		    if (kplus < band2)	rnew = rmed;
170 
171 		} else { /* -- yout = rmed < yin --- */
172 		    if(print_level >= 2) REprintf(": yout == rmed < yin ");
173 		    rbe = rtb = yin; /* was rinf */
174 		    for (i = first; i <= last; ++i) {
175 			yi = y[i];
176 			if (yi >= rmed) {
177 			    if (yi > rmed) {
178 				++kplus;
179 				if (yi < rtb)	rtb = yi;
180 				if (yi < rbe)	rbe = yi;
181 			    } else		rbe = yi;
182 			}
183 		    }
184 		    rnew = (kplus == band2) ? rtb : rbe;
185 		    if(print_level >= 2) REprintf("k+ : %d,", kplus);
186 		}
187 	    } /* else: both  yin, yout > rmed --> nothing to do */
188 	} /* else: yin == rmed -- nothing to do .... */
189 	if(print_level >= 2) REprintf("=> %12g, %12g\n", rmed, rnew);
190 	rmed = rnew;
191 	smo[ismo] = rmed;
192     } /*     END FOR ------------ big Loop -------------------- */
193 
194     if(end_rule == 0) { /*-- keep DATA at end values */
195 	for (i = ismo; i < n; ++i)
196 	    smo[i] = y[i];
197     }
198     else { /* if(end_rule == 1)  copy median to CONSTANT end values */
199 	for (i = ismo; i < n; ++i)
200 	    smo[i] = rmed;
201     }
202 } /* Srunmed */
203 
204 // anyNA() like [ see ../../../main/coerce.c ]
205 static
R_firstNA_dbl(const double x[],R_xlen_t n)206 R_xlen_t R_firstNA_dbl(const double x[], R_xlen_t n) {
207     for(R_xlen_t k = 0; k < n; k++)
208 	if(ISNAN(x[k]))
209 	    return k+1; // 1-index
210     return 0; // no NaN|NA found
211 }
212 
213 // .Call()ed from ../R/runmed.R
runmed(SEXP sx,SEXP stype,SEXP sk,SEXP end,SEXP naAct,SEXP printLev)214 SEXP runmed(SEXP sx, SEXP stype, SEXP sk, SEXP end, SEXP naAct, SEXP printLev)
215 {
216     if (TYPEOF(sx) != REALSXP) error("numeric 'x' required");
217     double *x = REAL(sx), *xx;
218     R_xlen_t n = XLENGTH(sx);
219     int type        = asInteger(stype),
220 	k           = asInteger(sk),
221 	end_rule    = asInteger(end),
222 	na_action   = asInteger(naAct),
223 	print_level = asInteger(printLev);
224     R_xlen_t
225 	firstNA = R_firstNA_dbl(x, n),
226 	nn = n;
227     if(print_level)
228 	Rprintf("firstNA = %d%s.\n", firstNA,
229 		(firstNA == 0) ? " <=> *no* NA/NaN" : "");
230     if(firstNA) { // anyNA(x)
231 	Rboolean NA_pos = TRUE;
232 	switch(na_action) {
233         case NA_BIG_alternate_M:
234 	    NA_pos = FALSE; // <<-- "M"inus: *not* positive
235 	    // no break; --> continue
236         case NA_BIG_alternate_P: {
237 	    xx = (double *) R_alloc(n, sizeof(double));
238 	    for(R_xlen_t i=0; i < n; i++) {
239 		if(ISNAN(x[i])) {
240 		    // replace NaN with +/- BIG (< Inf!), switching sign every time:
241 		    xx[i] = (NA_pos ? BIG_dbl : -BIG_dbl);
242 		    NA_pos = !NA_pos; // switch
243 		} else
244 		    xx[i] = x[i];
245 	    }
246 	    break;
247 	}
248 	case NA_OMIT: {
249 	    R_xlen_t i1 = firstNA-1; // firstNA is "1-index"
250 	    // xx <- x[!is.na(x)] :
251 	    xx = (double *) R_alloc(n-1, sizeof(double)); // too much if sum(is.na(.)) > 1
252 	    if(i1 > 1) Memcpy(xx, x, i1-1);
253 	    for(R_xlen_t i=i1, ix=i; i < n; i++) { //  i-ix == n-nn  {identity}
254 		if(ISNAN(x[i])) // drop NA/NaN and shift all to the left by 1 :
255 		    nn--; // nn + (i-ix) == n
256 		else
257 		    xx[ix++] = x[i];
258 	    } // --> now  xx[1:nn] == x[!is.na(x)]
259 	    break;
260 	}
261 	case NA_FAIL:
262 	    error(_("runmed(x, .., na.action=\"na.fail\"): have NAs starting at x[%ld]"),
263 		  firstNA);
264 	default:
265 	    error(_("runmed(): invalid 'na.action'"));
266 	}
267 
268     } else { // no NAs: xx just points to x; wont be modified
269 	xx = x;
270     }
271 
272     SEXP ans = PROTECT(allocVector(REALSXP, n));
273 
274     if (type == 1) {
275 	if (IS_LONG_VEC(sx))
276 	    error("long vectors are not supported for algorithm = \"Turlach\"");
277 	Trunmed(xx, REAL(ans), nn, k, end_rule, print_level);
278     } else {
279 	Srunmed(xx, REAL(ans), nn, k, end_rule, print_level);
280     }
281     if(firstNA) {
282 	double *median = REAL(ans);
283 	switch(na_action) {
284         case NA_BIG_alternate_P:
285         case NA_BIG_alternate_M: { // revert the +-BIG replacements
286 	    for(R_xlen_t i=firstNA-1; i < n; i++)
287 		if(ISNAN(x[i]) && !ISNAN(median[i]) && fabs(median[i]) == BIG_dbl)
288 		    median[i] = x[i];
289 	    break;
290 	}
291 	case NA_OMIT: { /* fill the shortened median[] series into the result,
292 			   putting x[i] into places i where  ISNAN(x[i]) */
293 	    if(print_level) {
294 		Rprintf("na.omit: reduced n = nn = %d.\n", nn);
295 		if(print_level >= 2) {
296 		    Rprintf("median[] = ");
297 		    for(R_xlen_t i=0; i < nn; i++) {
298 			if(i % 20 == 0) Rprintf("\n[%d] ", i);
299 			Rprintf("%5g", median[i]);
300 		    }
301 		    Rprintf("\n");
302 		}
303 	    }
304 	    double *med = (double *)R_alloc(nn, sizeof(double));
305 	    if(nn) Memcpy(med, median, nn);
306 	    for(R_xlen_t i=firstNA-1, ix=i; i < n; i++) {
307 		if(ISNAN(x[i])) {
308 		    median[i] = x[i];
309 		} else {
310 		    median[i] = med[ix++];
311 		}
312 	    }
313 	    break;
314 	}
315 	default: error(_("na_action logic error (%d), please report!"), na_action);
316 	}
317     }
318     UNPROTECT(1);
319     return ans;
320 }
321