1 /*
2 * R : A Computer Language for Statistical Data Analysis
3 * Copyright (C) 1997-2017 The R Core Team.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, a copy is available at
17 * https://www.R-project.org/Licenses/
18 */
19
20 #include <R_ext/Utils.h> /* R_rsort() */
21 #include <math.h>
22
23 #include <Rinternals.h>
24 #include "statsR.h"
25
26 #ifdef DEBUG_tukeyline
27 # include <R_ext/Print.h>
28 #endif
29
30 /* Speed up by `inlining' these (as macros) [since R version 1.2] : */
31 #if 1
32 #define il(n,x) (int)floor((n - 1) * x)
33 #define iu(n,x) (int) ceil((n - 1) * x)
34
35 #else
il(int n,double x)36 static int il(int n, double x)
37 {
38 return (int)floor((n - 1) * x) ;
39 }
40
iu(int n,double x)41 static int iu(int n, double x)
42 {
43 return (int)ceil((n - 1) * x);
44 }
45 #endif
46
line(double * x,double * y,double * z,double * w,int n,int iter,double coef[2])47 static void line(double *x, double *y, /* input (x[i],y[i])s */
48 double *z, double *w, /* work and output: resid. & fitted */
49 /* all the above of length */ int n,
50 int iter,
51 double coef[2])
52 {
53 int i, j, k;
54 double xb, x1, x2, xt, yt, yb, tmp1, tmp2;
55
56 for(i = 0 ; i < n ; i++) {
57 z[i] = x[i];
58 w[i] = y[i];
59 }
60 R_rsort(z, n);/* z = ordered abscissae */
61
62 // All x-space computations can (and hence should) happen outside the iterations
63
64 /* NB: Using quantiles is *not* quite correct (not following the reference).
65 Rather need 3 groups with group sizes depending on n and (n % 3)
66 */
67
68 // x1 := quantile(x, 1/3) :
69 tmp1 = z[il(n, 1./3.)];
70 tmp2 = z[iu(n, 1./3.)]; x1 = 0.5*(tmp1+tmp2);
71
72 // x1 := quantile(x, 2/3) :
73 tmp1 = z[il(n, 2./3.)];
74 tmp2 = z[iu(n, 2./3.)]; x2 = 0.5*(tmp1+tmp2);
75
76 // xb := x_L := Median(x[i]; x[i] <= quantile(x, 1/3))
77 for(i = 0, k = 0; i < n; i++)
78 if(x[i] <= x1)
79 z[k++] = x[i];
80 R_rsort(z, k); xb = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
81
82 // xt := x_R := Median(x[i]; x[i] >= quantile(x, 2/3))
83 for(i = 0, k = 0; i < n; i++)
84 if(x[i] >= x2)
85 z[k++] = x[i];
86 R_rsort(z, k); xt = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
87
88
89 #ifdef DEBUG_tukeyline
90 REprintf("tukey line(*, n = %d): xL=xb=%g, x_1=%g, x_2=%g, xR=xt=%g\n",
91 n, xb, x1, x2, xt);
92 #endif
93
94 double slope = 0.;
95 // "Polishing" iterations (incl first estimate) :
96 for(j = 1; j <= iter; j++) { // w[] = "current y"
97
98 /* yb := Median(y[i]; x[i] <= quantile(x, 1/3) */
99 for(i = 0, k = 0; i < n; i++)
100 if(x[i] <= x1)
101 z[k++] = w[i];
102 R_rsort(z, k);
103 yb = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
104 #ifdef DEBUG_tukeyline
105 REprintf(" Left 3rd: k=%d => (j1,j2)=(%d,%d) => yL=yb=%g\n",
106 k, il(k, 0.5), iu(k, 0.5), yb);
107 #endif
108
109 /* yt := Median(y[i]; x[i] >= quantile(x, 2/3) */
110 for(i = 0, k = 0; i < n; i++)
111 if(x[i] >= x2)
112 z[k++] = w[i];
113 R_rsort(z,k);
114 yt = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
115 #ifdef DEBUG_tukeyline
116 REprintf(" Right 3rd: k=%d => (j1,j2)=(%d,%d) => yR=yt=%g\n",
117 k, il(k, 0.5), iu(k, 0.5), yt);
118 #endif
119
120 slope += (yt - yb)/(xt - xb);
121 for(i = 0 ; i < n ; i++)
122 w[i] = y[i] - slope*x[i];
123 } // iterations
124
125 // intercept := median{ "residuals" } (= mean of the 3 median residuals)
126 R_rsort(w,n);
127 double yint = 0.5 * (w[il(n, 0.5)] + w[iu(n, 0.5)]);
128
129 for( i = 0 ; i < n ; i++ ) {
130 w[i] = yint + slope*x[i];
131 z[i] = y[i] - w[i];
132 }
133 coef[0] = yint;
134 coef[1] = slope;
135 }
136
137 // where is this used?
tukeyline0(double * x,double * y,double * z,double * w,int * n,double * coef)138 void tukeyline0(double *x, double *y, double *z, double *w, int *n,
139 double *coef)
140 {
141 line(x, y, z, w, *n, 1, coef);
142 }
143
tukeyline(SEXP x,SEXP y,SEXP iter,SEXP call)144 SEXP tukeyline(SEXP x, SEXP y, SEXP iter, SEXP call)
145 {
146 int n = LENGTH(x);
147 if (n < 2) error("insufficient observations");
148 SEXP ans;
149 ans = PROTECT(allocVector(VECSXP, 4));
150 SEXP nm = allocVector(STRSXP, 4);
151 setAttrib(ans, R_NamesSymbol, nm);
152 SET_STRING_ELT(nm, 0, mkChar("call"));
153 SET_STRING_ELT(nm, 1, mkChar("coefficients"));
154 SET_STRING_ELT(nm, 2, mkChar("residuals"));
155 SET_STRING_ELT(nm, 3, mkChar("fitted.values"));
156 SET_VECTOR_ELT(ans, 0, call);
157 SEXP coef = allocVector(REALSXP, 2);
158 SET_VECTOR_ELT(ans, 1, coef);
159 SEXP res = allocVector(REALSXP, n);
160 SET_VECTOR_ELT(ans, 2, res);
161 SEXP fit = allocVector(REALSXP, n);
162 SET_VECTOR_ELT(ans, 3, fit);
163 line(REAL(x), REAL(y), REAL(res), REAL(fit), n, asInteger(iter), REAL(coef));
164 UNPROTECT(1);
165 return ans;
166 }
167