1 /*
2  *  R : A Computer Language for Statistical Data Analysis
3  *  Copyright (C) 1997-2017   The R Core Team.
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, a copy is available at
17  *  https://www.R-project.org/Licenses/
18  */
19 
20 #include <R_ext/Utils.h>	/* R_rsort() */
21 #include <math.h>
22 
23 #include <Rinternals.h>
24 #include "statsR.h"
25 
26 #ifdef DEBUG_tukeyline
27 # include <R_ext/Print.h>
28 #endif
29 
30 /* Speed up by `inlining' these (as macros) [since R version 1.2] : */
31 #if 1
32 #define il(n,x)	(int)floor((n - 1) * x)
33 #define iu(n,x)	(int) ceil((n - 1) * x)
34 
35 #else
il(int n,double x)36 static int il(int n, double x)
37 {
38     return (int)floor((n - 1) * x) ;
39 }
40 
iu(int n,double x)41 static int iu(int n, double x)
42 {
43     return (int)ceil((n - 1) * x);
44 }
45 #endif
46 
line(double * x,double * y,double * z,double * w,int n,int iter,double coef[2])47 static void line(double *x, double *y, /* input (x[i],y[i])s */
48 		 double *z, double *w, /* work and output: resid. & fitted */
49 		 /* all the above of length */ int n,
50 		 int iter,
51 		 double coef[2])
52 {
53     int i, j, k;
54     double xb, x1, x2, xt, yt, yb, tmp1, tmp2;
55 
56     for(i = 0 ; i < n ; i++) {
57 	z[i] = x[i];
58 	w[i] = y[i];
59     }
60     R_rsort(z, n);/* z = ordered abscissae */
61 
62     // All x-space computations can (and hence should) happen outside the iterations
63 
64     /* NB: Using quantiles is *not* quite correct (not following the reference).
65            Rather need 3 groups with group sizes depending on n and (n % 3)
66     */
67 
68     // x1 := quantile(x, 1/3) :
69     tmp1 = z[il(n, 1./3.)];
70     tmp2 = z[iu(n, 1./3.)];	x1 = 0.5*(tmp1+tmp2);
71 
72     // x1 := quantile(x, 2/3) :
73     tmp1 = z[il(n, 2./3.)];
74     tmp2 = z[iu(n, 2./3.)];	x2 = 0.5*(tmp1+tmp2);
75 
76     // xb := x_L := Median(x[i]; x[i] <= quantile(x, 1/3))
77     for(i = 0, k = 0; i < n; i++)
78 	if(x[i] <= x1)
79 	    z[k++] = x[i];
80     R_rsort(z, k); xb = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
81 
82     // xt := x_R := Median(x[i]; x[i] >= quantile(x, 2/3))
83     for(i = 0, k = 0; i < n; i++)
84 	if(x[i] >= x2)
85 	    z[k++] = x[i];
86     R_rsort(z, k); xt = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
87 
88 
89 #ifdef DEBUG_tukeyline
90     REprintf("tukey line(*, n = %d): xL=xb=%g, x_1=%g, x_2=%g, xR=xt=%g\n",
91 	     n, xb, x1, x2, xt);
92 #endif
93 
94     double slope = 0.;
95     // "Polishing" iterations (incl first estimate) :
96     for(j = 1; j <= iter; j++) { // w[] = "current y"
97 
98 	/* yb := Median(y[i]; x[i] <= quantile(x, 1/3) */
99 	for(i = 0, k = 0; i < n; i++)
100 	    if(x[i] <= x1)
101 		z[k++] = w[i];
102 	R_rsort(z, k);
103 	yb = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
104 #ifdef DEBUG_tukeyline
105 	REprintf("   Left 3rd: k=%d => (j1,j2)=(%d,%d) =>  yL=yb=%g\n",
106 		 k, il(k, 0.5), iu(k, 0.5), yb);
107 #endif
108 
109 	/* yt := Median(y[i]; x[i] >= quantile(x, 2/3) */
110 	for(i = 0, k = 0; i < n; i++)
111 	    if(x[i] >= x2)
112 		z[k++] = w[i];
113 	R_rsort(z,k);
114 	yt = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
115 #ifdef DEBUG_tukeyline
116 	REprintf("  Right 3rd: k=%d => (j1,j2)=(%d,%d) =>  yR=yt=%g\n",
117 		 k, il(k, 0.5), iu(k, 0.5), yt);
118 #endif
119 
120 	slope += (yt - yb)/(xt - xb);
121 	for(i = 0 ; i < n ; i++)
122 	    w[i] = y[i] - slope*x[i];
123     } // iterations
124 
125     // intercept := median{ "residuals" } (= mean of the 3 median residuals)
126     R_rsort(w,n);
127     double yint = 0.5 * (w[il(n, 0.5)] + w[iu(n, 0.5)]);
128 
129     for( i = 0 ; i < n ; i++ ) {
130 	w[i] = yint + slope*x[i];
131 	z[i] = y[i] - w[i];
132     }
133     coef[0] = yint;
134     coef[1] = slope;
135 }
136 
137 // where is this used?
tukeyline0(double * x,double * y,double * z,double * w,int * n,double * coef)138 void tukeyline0(double *x, double *y, double *z, double *w, int *n,
139 		double *coef)
140 {
141     line(x, y, z, w, *n, 1, coef);
142 }
143 
tukeyline(SEXP x,SEXP y,SEXP iter,SEXP call)144 SEXP tukeyline(SEXP x, SEXP y, SEXP iter, SEXP call)
145 {
146     int n = LENGTH(x);
147     if (n < 2) error("insufficient observations");
148     SEXP ans;
149     ans = PROTECT(allocVector(VECSXP, 4));
150     SEXP nm = allocVector(STRSXP, 4);
151     setAttrib(ans, R_NamesSymbol, nm);
152     SET_STRING_ELT(nm, 0, mkChar("call"));
153     SET_STRING_ELT(nm, 1, mkChar("coefficients"));
154     SET_STRING_ELT(nm, 2, mkChar("residuals"));
155     SET_STRING_ELT(nm, 3, mkChar("fitted.values"));
156     SET_VECTOR_ELT(ans, 0, call);
157     SEXP coef = allocVector(REALSXP, 2);
158     SET_VECTOR_ELT(ans, 1, coef);
159     SEXP res = allocVector(REALSXP, n);
160     SET_VECTOR_ELT(ans, 2, res);
161     SEXP fit = allocVector(REALSXP, n);
162     SET_VECTOR_ELT(ans, 3, fit);
163     line(REAL(x), REAL(y), REAL(res), REAL(fit), n, asInteger(iter), REAL(coef));
164     UNPROTECT(1);
165     return ans;
166 }
167