1 /*
2  * Copyright (C) 2012-2019  The R Core Team
3  * Copyright (C) 2003 ff.   The R Foundation
4  * Copyright (C) 2000-2 Martin Maechler <maechler@stat.math.ethz.ch>
5  * Copyright (C) 1995   Berwin A. Turlach <berwin@alphasun.anu.edu.au>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11 
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
15  * GNU General Public License for more details.
16 
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, a copy is available at
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21 
22 /* These routines implement a running median smoother according to the
23  * algorithm described in Haerdle und Steiger (1995),
24  *			  DOI:10.2307/2986349 , see ../man/runmed.Rd
25  *
26  * The current implementation does not use any global variables!
27  */
28 
29 /* Changes for R port by Martin Maechler ((C) above):
30  *
31  *  s/long/int/			R uses int, not long (as S does)
32  *  s/void/static void/		most routines are internal
33  *
34  * - Added  print_level  and end_rule  arguments
35  * - Allow to deal with  NA / NaN {via ISNAN()}
36  */
37 
38 /* Variable	name	descri- | Identities from paper
39  * name here	paper	ption   | (1-indexing)
40  * ---------    -----	-----------------------------------
41  * window[]      H      the array containing the double heap
42  * data[]        X      the data (left intact)
43  * ...		 i	1st permuter  H[i[m]]    == X[i + m]
44  * ...		 j	2nd permuter  X[i +j[m]] == H[m]
45  */
46 
47 #include <math.h>
48 #include <R_ext/RS.h>	       	/* for Memcpy */
49 
50 static void
swap(int l,int r,double * window,int * outlist,int * nrlist,int print_level)51 swap(int l, int r, double *window, int *outlist, int *nrlist, int print_level)
52 {
53     /* swap positions `l' and `r' in window[] and nrlist[]
54      *
55      * ---- Used in R_heapsort() and many other routines
56      */
57     if(print_level >= 3) Rprintf(" SW(%d,%d) ", l,r);
58     double tmp = window[l]; window[l] = window[r];  window[r] = tmp;
59     int nl = nrlist[l],
60 	nr = nrlist[r];
61     nrlist[l] = nr; outlist[nr] = l;
62     nrlist[r] = nl; outlist[nl] = r;
63 }
64 
65 //------------------------ 1. inittree() and auxiliaries ----------------------
66 static void
siftup(int l,int r,double * window,int * outlist,int * nrlist,int print_level)67 siftup(int l, int r, double *window, int *outlist, int *nrlist, int print_level)
68 {
69     /* Used only in R_heapsort() */
70     int i = l, j,
71 	nrold = nrlist[i];
72     double x  = window[i];
73     if(print_level >= 2) Rprintf("siftup(%d,%d): x=%g: ", l,r, x);
74     while ((j = 2*i) <= r) {
75 	if (j < r)
76 	    if (window[j] < window[j+1])
77 		j++;
78 	if (x >= window[j])
79 	    break;
80 
81 	window[i]	   = window[j];
82 	outlist[nrlist[j]] = i;
83 	nrlist[i]	   = nrlist[j];
84 	i = j;
85     }
86     window[i]	   = x;
87     outlist[nrold] = i;
88     nrlist[i]	   = nrold;
89     if(print_level >= 2) Rprintf("-> nrlist[i=%d] := %d\n", i, nrold);
90 }
91 
92 static void
R_heapsort(int low,int up,double * window,int * outlist,int * nrlist,int print_level)93 R_heapsort(int low, int up, double *window, int *outlist, int *nrlist,
94 	   int print_level)
95 {
96     int l = (up/2) + 1,
97 	u = up;
98     if(print_level)
99 	Rprintf("R_heapsort(%d, %d,..): l=%d:\n", low, up, l);
100     while(l > low) {
101 	if(print_level >= 2) Rprintf(" l > low: ");
102 	l--; // l >= low :
103 	siftup(l, u, window, outlist, nrlist, print_level);
104     } // => l <= low
105     while(u > low) {
106 	if(print_level >= 2) Rprintf(" u > low: ");
107 	swap(l, u, window, outlist, nrlist, print_level);
108 	u--; // u >= low :
109 	siftup(l, u, window, outlist, nrlist, print_level);
110     }
111 }
112 
113 static void
inittree(R_xlen_t n,int k,int k2,const double * data,double * window,int * outlist,int * nrlist,int print_level)114 inittree(R_xlen_t n, int k, int k2,
115 	 const double *data,
116 	 // --> initialize these three vectors:
117 	 double *window, int *outlist, int *nrlist,
118 	 int print_level)
119 {
120     // use 1-indexing for our 3 arrays {window, nrlist, outlist}
121     for(int i=1; i <= k; i++) {
122 	window[i] = data[i-1];
123 	nrlist[i] = (outlist[i] = i);
124     }
125     // MM: not all  2k+1  entries of nrlist[] are used, it seems
126     if(print_level >= 1) {
127 	int    n0 = -12345; // to be recognized easily ...
128 	double w0 = -80.08; //  (ditto)
129 	nrlist[0] = n0;
130 	window[0] = w0;
131 	for(int j=k+1; j <= 2*k; j++) {
132 	    nrlist[j] = n0;
133 	    window[j] = w0;
134 	}
135     }
136 
137     // sort window[1:k] = data["1:k"]   [*only* called here] :
138     R_heapsort(1, k, window, outlist, nrlist, print_level);
139 
140     double big = fabs(window[k]);
141     if (big < fabs(window[1]))
142 	big = fabs(window[1]);
143     // now   big := max |X[1..k]|  or +BIG  if(first_NA <= k), i.e., data had NA|NaN
144     for(int i=k; i > 0; i--) {
145 	window[i+k2] = window[i];
146 	nrlist[i+k2] = nrlist[i]-1;
147     }
148     // outlist[0:(k-1)] :=  (shift down by 1  and offset by k2)
149     for(int i=0; i < k; i++)
150 	outlist[i] = outlist[i+1] + k2;
151 
152     // maybe increase 'big'
153     for(R_xlen_t i=k; i < n; i++)
154 	if (big < fabs(data[i]))
155 	    big = fabs(data[i]);
156 
157     /* big == max(|data_i|,  i = 1,..,n) */
158     big = 1 + 2. * big;/* such that -big < data[] < +big
159 			  (strictly, only if no +/-Inf in data! */
160     int k2p1 = k2+1;
161     // window[0] := ..
162     for(int i=0; i < k2p1; i++) {
163 	window[i]	 = -big;
164 	window[k+k2p1+i] =  big;
165     }
166 
167 /* For Printing `diagnostics' : */
168 #define Rm_PR(_F_,_A_, _k_)	for(int j = 0; j <= _k_; j++) Rprintf(_F_, _A_)
169 #define RgPRINT_j(A_J, _k_)	Rm_PR("% 6.4g", A_J, _k_); Rprintf("\n")
170 #define RdPRINT_j(A_J, _k_)	Rm_PR("% 6d"  , A_J, _k_); Rprintf("\n")
171 //--- smart printing of "Big" / "2B"  [BIG_dbl := .... -> ./Srunmed.c ]
172 #define RwwPRINTj(A_J, _k_)					\
173     for(int j = 0; j <= _k_; j++) {				\
174 	if     (A_J ==  BIG_dbl  ) Rprintf("%6s", "+BIG");	\
175 	else if(A_J == -BIG_dbl  ) Rprintf("%6s", "-BIG");	\
176 	else if(A_J ==  2*BIG_dbl) Rprintf("%6s", "+2B");	\
177 	else if(A_J == -2*BIG_dbl) Rprintf("%6s", "-2B");	\
178 	else                       Rprintf("% 6.4g", A_J);	\
179     }								\
180     Rprintf("\n")
181 
182 #define R_PRINT_4vec()						\
183     Rprintf(" %9s: ","j");         RdPRINT_j(        j, 2*k);	\
184     Rprintf(" %9s: ","window []"); RwwPRINTj(window[j], 2*k);	\
185     Rprintf(" %9s: "," nrlist[]"); RdPRINT_j(nrlist[j], 2*k);	\
186     Rprintf(" %9s: ","outlist[]"); RdPRINT_j(outlist[j], k )
187 
188     /* window[], outlist[], and nrlist[] are all 1-based (indices) */
189     if(print_level) {
190 	R_PRINT_4vec();
191     }
192 
193 } /* inittree*/
194 
195 //------------------------ 2. runmedint() and auxiliaries -------------------
196 static void
toroot(int outvirt,int k,R_xlen_t nrnew,int outnext,const double * data,double * window,int * outlist,int * nrlist,int print_level)197 toroot(int outvirt, int k, R_xlen_t nrnew, int outnext,
198        const double *data, double *window, int *outlist, int *nrlist,
199        int print_level)
200 {
201     if(print_level >= 2)
202 	Rprintf("  toroot(%d,%d, nn=%d, outn=%d) ", outvirt, k, (int) nrnew, outnext);
203     int father;
204     do {
205 	father                    = outvirt/2;
206 	window[outvirt+k]	  = window[father+k];
207 	outlist[nrlist[father+k]] = outvirt+k;
208 	if(print_level >= 3)
209 	    Rprintf(" nrl[%d] := nrl[%d] = %d;", outvirt+k, father+k, nrlist[father+k]);
210 	nrlist[outvirt+k]	  = nrlist[father+k];
211 	outvirt			  = father;
212     } while (father != 0);
213     if(print_level >= 2) Rprintf("\n  ");
214     window[k]	     = data[nrnew];
215     outlist[outnext] = k;
216     nrlist[k]	     = outnext;
217 }
218 
219 static void
downtoleave(int outvirt,int k,double * window,int * outlist,int * nrlist,int print_level)220 downtoleave(int outvirt, int k,
221 	    double *window, int *outlist, int *nrlist, int print_level)
222 {
223     if(print_level >= 2) Rprintf(" downtoleave(%d, %d)  ", outvirt,k);
224     for(;;) {
225 	int childl = outvirt*2,
226 	    childr = childl -1;
227 	if (window[childl+k] < window[childr+k])
228 	    childl = childr;
229 	if (window[outvirt+k] >= window[childl+k])
230 	    break;
231 	/* seg.fault may happen here: invalid outvirt+k/ childl+k ? */
232 	swap(outvirt+k, childl+k, window, outlist, nrlist, print_level);
233 	outvirt = childl;
234     }
235     if(print_level >= 2) Rprintf("\n ");
236 }
237 
238 static void
uptoleave(int outvirt,int k,double * window,int * outlist,int * nrlist,int print_level)239 uptoleave(int outvirt, int k,
240 	  double *window, int *outlist, int *nrlist, int print_level)
241 {
242     if(print_level >= 2) Rprintf(" uptoleave(%d, %d)  ", outvirt,k);
243     for(;;) {
244 	int childl = outvirt*2,
245 	    childr = childl +1;
246 	if (window[childl+k] > window[childr+k])
247 	    childl = childr;
248 	if (window[outvirt+k] <= window[childl+k])
249 	    break;
250 	/* seg.fault may happen here: invalid outvirt+k/ childl+k ? */
251 	swap(outvirt+k, childl+k, window, outlist, nrlist, print_level);
252 	outvirt = childl;
253     }
254     if(print_level >= 2) Rprintf("\n ");
255 }
256 
257 static void
upperoutupperin(int outvirt,int k,double * window,int * outlist,int * nrlist,int print_level)258 upperoutupperin(int outvirt, int k,
259 		double *window, int *outlist, int *nrlist, int print_level)
260 {
261     if(print_level >= 2) Rprintf("UpperoutUPPERin(%d, %d)\n  ", outvirt,k);
262     uptoleave(outvirt, k, window, outlist, nrlist, print_level);
263     int father = outvirt/2;
264     while (window[outvirt+k] < window[father+k]) {
265 	if(print_level >= 2) Rprintf(" UpperoutUP(win[%d]):  ", outvirt+k);
266 	swap(outvirt+k, father+k, window, outlist, nrlist, print_level);
267 	outvirt = father;
268 	father	= outvirt/2;
269     }
270     if(print_level >= 2) Rprintf("\n");
271 }
272 
273 static void
upperoutdownin(int outvirt,int k,R_xlen_t nrnew,int outnext,const double * data,double * window,int * outlist,int * nrlist,int print_level)274 upperoutdownin(int outvirt, int k, R_xlen_t nrnew, int outnext,
275 	       const double *data, double *window, int *outlist, int *nrlist,
276 	       int print_level)
277 {
278     if(print_level >= 2) Rprintf("__upperoutDOWNin(%d, %d, ..)\n  ", outvirt,k);
279     toroot(outvirt, k, nrnew, outnext, data, window, outlist, nrlist, print_level);
280     if(window[k] < window[k-1]) {
281 	if(print_level >= 2) Rprintf(" upperoutDN(win[%d]):  ", k);
282 	swap(k, k-1, window, outlist, nrlist, print_level);
283 	downtoleave(/*outvirt = */ -1, k, window, outlist, nrlist, print_level);
284     }
285 }
286 
287 static void
downoutdownin(int outvirt,int k,double * window,int * outlist,int * nrlist,int print_level)288 downoutdownin(int outvirt, int k,
289 	      double *window, int *outlist, int *nrlist, int print_level)
290 {
291     if(print_level >= 2) Rprintf("DownoutDOWNin(%d, %d)\n  ", outvirt,k);
292     downtoleave(outvirt, k, window, outlist, nrlist, print_level);
293     int father = outvirt/2;
294     while (window[outvirt+k] > window[father+k]) {
295 	swap(outvirt+k, father+k, window, outlist, nrlist, print_level);
296 	outvirt = father;
297 	father	= outvirt/2;
298     }
299     // if(print_level >= 2) Rprintf("\n");
300 }
301 
302 static void
downoutupperin(int outvirt,int k,R_xlen_t nrnew,int outnext,const double * data,double * window,int * outlist,int * nrlist,int print_level)303 downoutupperin(int outvirt, int k, R_xlen_t nrnew, int outnext,
304 	       const double *data, double *window, int *outlist, int *nrlist,
305 	       int print_level)
306 {
307     if(print_level >= 2) Rprintf("__downoutUPPERin(%d, %d, ..)\n  ", outvirt,k);
308     toroot(outvirt, k, nrnew, outnext, data, window, outlist, nrlist, print_level);
309     if(window[k] > window[k+1]) {
310 	swap(k, k+1, window, outlist, nrlist, print_level);
311 	uptoleave(/*outvirt = */ +1, k, window, outlist, nrlist, print_level);
312     }
313 }
314 
315 static void
wentoutone(int k,double * window,int * outlist,int * nrlist,int print_level)316 wentoutone(int k, double *window, int *outlist, int *nrlist, int print_level)
317 {
318     if(print_level >= 2) Rprintf(" wentOUT_1(%d)\n  ", k);
319     swap(k, k+1, window, outlist, nrlist, print_level);
320     uptoleave(/*outvirt = */ +1, k, window, outlist, nrlist, print_level);
321 }
322 
323 static void
wentouttwo(int k,double * window,int * outlist,int * nrlist,int print_level)324 wentouttwo(int k, double *window, int *outlist, int *nrlist, int print_level)
325 {
326     if(print_level >= 2) Rprintf(" wentOUT_2(%d)\n  ", k);
327     swap(k, k-1, window, outlist, nrlist, print_level);
328     downtoleave(/*outvirt = */ -1, k, window, outlist, nrlist, print_level);
329 }
330 
331 
332 static void
runmedint(R_xlen_t n,int k,int k2,const double * data,double * median,double * window,int * outlist,int * nrlist,int end_rule,int print_level)333 runmedint(R_xlen_t n, int k, int k2, const double *data, double *median,
334 	  double *window, int *outlist, int *nrlist,
335 	  int end_rule, int print_level)
336 {
337     /* Running Median of `k' ,  k == 2*k2 + 1 *
338      * end_rule == 0: leave values at the end,
339      *          otherwise: "constant" end values
340      */
341     int outnext, out, outvirt;
342 
343     if(end_rule)
344 	for(int i = 0; i <= k2; median[i++] = window[k]);
345     else {
346 	for(int i = 0; i <  k2; median[i] = data[i], i++);
347 	median[k2] = window[k];
348     }
349     R_xlen_t every_i;
350     if(print_level >= 2)
351 	every_i = (n > 100) ? n/10 : 10;
352 
353     outnext = 0;
354     for(R_xlen_t i = k2+1; i < n-k2; i++) {/* compute (0-index) median[i] == X*_{i+1} */
355 	out  = outlist[outnext];
356 	R_xlen_t nrnew = i+k2;
357 	window[out] = data[nrnew];
358 	outvirt	= out-k;
359 	if (out > k)
360 	    if(data[nrnew] >= window[k])
361 		upperoutupperin(outvirt, k, window, outlist, nrlist, print_level);
362 	    else
363 		upperoutdownin(outvirt, k, nrnew, outnext,
364 			       data, window, outlist, nrlist, print_level);
365 	else if(out < k)
366 	    if(data[nrnew] < window[k])
367 		downoutdownin(outvirt, k, window, outlist, nrlist, print_level);
368 	    else
369 		downoutupperin(outvirt, k, nrnew, outnext,
370 			       data, window, outlist, nrlist, print_level);
371 	else if(window[k] > window[k+1])
372 	    wentoutone(k, window, outlist, nrlist, print_level);
373 	else if(window[k] < window[k-1])
374 	    wentouttwo(k, window, outlist, nrlist, print_level);
375 	median[i] = window[k];
376 	outnext	  = (outnext+1) % k;
377 	if(print_level >= 2) {
378 	    Rprintf("i=%2d (out=%2d, *virt=%2d): med[i] := window[k]=%11g, outnext=%3d\n",
379 		    i, out, outvirt, median[i], outnext);
380 	    if(print_level >= 3 || (i % every_i) == 0) {
381 		R_PRINT_4vec();
382 	    }
383 	}
384     }
385     if(print_level >= 2) Rprintf("\n");
386 
387     if(end_rule)
388 	for(R_xlen_t i = n-k2; i < n; median[i++] = window[k]);
389     else
390 	for(R_xlen_t i = n-k2; i < n; median[i] = data[i], i++);
391 
392 }/* runmedint() */
393 
394 //------------------------ 3. Trunmed() --------------------------------------
395 
396 // Main function called from runmed() in ./Srunmed.c :
Trunmed(const double * x,double * median,R_xlen_t n,int k,int end_rule,int print_level)397 static void Trunmed(const double *x,// (n)
398 		    double *median, // (n)
399 		    R_xlen_t n,/* = length(x) */
400 		    int k, /* is odd <= n */
401 		    int end_rule, int print_level)
402 {
403     int k2 = (k - 1)/2; // always odd  k == 2 * k2 + 1
404     double *window = (double *) R_alloc(2*k + 1, sizeof(double));
405     int    *nrlist = (int *)    R_alloc(2*k + 1, sizeof(int)),
406 	  *outlist = (int *)    R_alloc(  k + 1, sizeof(int));
407 
408     inittree(n, k, k2, x,
409 	     /* initialize the 3 vectors: */ window, outlist, nrlist,
410 	     print_level);
411 
412     runmedint(n, k, k2, x, median, window, outlist, nrlist,
413 	      end_rule, print_level);
414 
415 }
416