1## Copyright (C) 2012-2015 Nir Krakauer 2## 3## This program is free software; you can redistribute it and/or modify 4## it under the terms of the GNU General Public License as published by 5## the Free Software Foundation; either version 2 of the License, or 6## (at your option) any later version. 7## 8## This program is distributed in the hope that it will be useful, 9## but WITHOUT ANY WARRANTY; without even the implied warranty of 10## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11## GNU General Public License for more details. 12## 13## You should have received a copy of the GNU General Public License 14## along with this program; If not, see <http://www.gnu.org/licenses/>. 15 16## -*- texinfo -*- 17## @deftypefn{Function File}{[@var{yi} @var{p} @var{sigma2} @var{unc_yi} @var{df}] =} csaps(@var{x}, @var{y}, @var{p}, @var{xi}, @var{w}=[]) 18## @deftypefnx{Function File}{[@var{pp} @var{p} @var{sigma2}] =} csaps(@var{x}, @var{y}, @var{p}, [], @var{w}=[]) 19## 20## Cubic spline approximation (smoothing)@* 21## approximate [@var{x},@var{y}], weighted by @var{w} (inverse variance of the @var{y} values; if not given, equal weighting is assumed), at @var{xi} 22## 23## The chosen cubic spline with natural boundary conditions @var{pp}(@var{x}) minimizes @var{p} * Sum_i @var{w}_i*(@var{y}_i - @var{pp}(@var{x}_i))^2 + (1-@var{p}) * Int @var{pp}''(@var{x}) d@var{x} 24## 25## Outside the range of @var{x}, the cubic spline is a straight line 26## 27## @var{x} and @var{w} should be n by 1 in size; @var{y} should be n by m; @var{xi} should be k by 1; the values in @var{x} should be distinct and in ascending order; the values in @var{w} should be nonzero 28## 29## @table @asis 30## @item @var{p}=0 31## maximum smoothing: straight line 32## @item @var{p}=1 33## no smoothing: interpolation 34## @item @var{p}<0 or not given 35## an intermediate amount of smoothing is chosen @* 36## and the corresponding @var{p} between 0 and 1 is returned @* 37## (such that the smoothing term and the interpolation term are of the same magnitude) @* 38## (csaps_sel provides other methods for automatically selecting the smoothing parameter @var{p}.) 39## @end table 40## 41## @var{sigma2} is an estimate of the data error variance, assuming the smoothing parameter @var{p} is realistic 42## 43## @var{unc_yi} is an estimate of the standard error of the fitted curve(s) at the @var{xi}. 44## Empty if @var{xi} is not provided. 45## 46## @var{df} is an estimate of the degrees of freedom used in the spline fit (2 for @var{p}=0, n for @var{p}=1) 47## 48## 49## References: @* 50## Carl de Boor (1978), A Practical Guide to Splines, Springer, Chapter XIV @* 51## Grace Wahba (1983), Bayesian ``confidence intervals'' for the cross-validated smoothing spline, Journal of the Royal Statistical Society, 45B(1):133-150 52## 53## @end deftypefn 54## @seealso{spline, splinefit, csapi, ppval, dedup, bin_values, csaps_sel} 55 56## Author: Nir Krakauer <nkrakauer@ccny.cuny.edu> 57 58function [ret,p,sigma2,unc_yi,df]=csaps(x,y,p,xi,w) 59 60warning ("off", "Octave:broadcast", "local"); 61 62 if(nargin < 5) 63 w = []; 64 if(nargin < 4) 65 xi = []; 66 if(nargin < 3) 67 p = []; 68 endif 69 endif 70 endif 71 72 if(columns(x) > 1) 73 x = x'; 74 y = y'; 75 w = w'; 76 endif 77 78 if any (isnan ([x y w](:)) ) 79 error('NaN values in inputs; pre-process to remove them') 80 endif 81 82 h = diff(x); 83 if !all(h > 0) && !all(h < 0) 84 error('x must be strictly monotone; pre-process to achieve this') 85 endif 86 87 88 [n m] = size(y); #should also be that n = numel(x); 89 90 if isempty(w) 91 w = ones(n, 1); 92 end 93 94 95 R = spdiags([h(2:end) 2*(h(1:end-1) + h(2:end)) h(1:end-1)], [-1 0 1], n-2, n-2); 96 97 QT = spdiags([1 ./ h(1:end-1) -(1 ./ h(1:end-1) + 1 ./ h(2:end)) 1 ./ h(2:end)], [0 1 2], n-2, n); 98 99## if not given, choose p so that trace(6*(1-p)*QT*diag(1 ./ w)*QT') = trace(p*R) 100 if isempty(p) || (p < 0) 101 r = full(6*trace(QT*diag(1 ./ w)*QT') / trace(R)); 102 p = r ./ (1 + r); 103 endif 104 105## solve for the scaled second derivatives u and for the function values a at the knots 106## (if p = 1, a = y; if p = 0, cc(:) = dd(:) = 0) 107## QT*y can also be written as (y(3:n, :) - y(2:(n-1), :)) ./ h(2:end) - (y(2:(n-1), :) - y(1:(n-2), :)) ./ h(1:(end-1)) 108 u = (6*(1-p)*QT*diag(1 ./ w)*QT' + p*R) \ (QT*y); 109 a = y - 6*(1-p)*diag(1 ./ w)*QT'*u; 110 111## derivatives for the piecewise cubic spline 112 aa = bb = cc = dd = zeros (n+1, m); 113 aa(2:end, :) = a; 114 cc(3:n, :) = 6*p*u; #second derivative at endpoints is 0 [natural spline] 115 dd(2:n, :) = diff(cc(2:(n+1), :)) ./ h; 116 bb(2:n, :) = diff(a) ./ h - (h/3) .* (cc(2:n, :) + cc(3:(n+1), :)/2); 117 118## add knots to either end of spline pp-form to ensure linear extrapolation 119 dx_minus = eps(x(1)); 120 dx_plus = eps(x(end)); 121 xminus = x(1) - dx_minus; 122 xplus = x(end) + dx_plus; 123 x = [xminus; x; xplus]; 124 slope_minus = bb(2, :); 125 slope_plus = bb(n, :) + cc(n, :)*h(n-1) + (dd(n, :)/2)*h(n-1)^2; 126 bb(1, :) = slope_minus; #linear extension of splines 127 bb(n + 1, :) = slope_plus; 128 aa(1, :) = a(1, :) - dx_minus*bb(1, :); 129 130 ret = mkpp (x, cat (2, dd'(:)/6, cc'(:)/2, bb'(:), aa'(:)), m); 131 clear a aa bb cc dd slope_minus slope_plus u #these values are no longer needed 132 133 if ~isempty (xi) 134 ret = ppval (ret, xi); 135 endif 136 137 if (isargout (4) && isempty (xi)) 138 unc_yi = []; 139 endif 140 141 if isargout (3) || (isargout (4) && ~isempty (xi)) || isargout (5) 142 143 if p == 1 #interpolation assumes no error in the given data 144 sigma2 = 0; 145 if isargout (4) && ~isempty (xi) 146 unc_yi = zeros(numel(xi), 1); 147 endif 148 df = n; 149 return 150 endif 151 152 [U D V] = svd (diag(1 ./ sqrt(w))*QT'*sqrtm(inv(R)), 0); D = diag(D).^2; 153 #influence matrix for given p 154 A = speye(n) - U * diag(D ./ (D + (p / (6*(1-p))))) * U'; 155 A = diag (1 ./ sqrt(w)) * A * diag(sqrt(w)); #rescale to original units; a = A*y 156 MSR = mean (w .* (y - (A*y)) .^ 2); #mean square residual 157 df = trace (A); 158 sigma2 = mean (MSR(:)) * (n / (n-df)); #estimated data error variance (wahba83) 159 160 if isargout (4) && ~isempty (xi) 161 ni = numel (xi); 162 #dependence of spline values on each given point (to compute uncertainty) 163 C = 6 * p * full ((6*(1-p)*QT*diag(1 ./ w)*QT' + p*R) \ QT); #cc(3:n, :) = C*y [sparsity is lost] 164 D = diff ([zeros(n, 1) C' zeros(n, 1)]') ./ h; #dd(2:n, :) = D*y 165 B = diff (A) ./ h - (h/3) .* ([zeros(n, 1) C']' + [C' zeros(n, 1)]' / 2); #bb(2:n, :) = B*y 166 #add end-points 167 C = [zeros(n, 2) C' zeros(n, 1)]'; 168 D = [zeros(n, 1) D' zeros(n, 1)]'; 169 B = [B(1, :)' B' B(end, :)' + C(n, :)'*h(n-1) + (D(n, :)'/2)*h(n-1)^2]'; 170 A = [A(1, :)'-eps(x(1))*B(1, :)' A']'; 171 #sum the squared dependence on each data value y at each requested point xi 172 unc_yi = zeros (ni, 1); 173 for i = 1:n 174 unc_yi += (ppval (mkpp (x, cat (2, D(:, i)/6, C(:, i)/2, B(:, i), A(:, i))), xi(:))) .^ 2; 175 endfor 176 unc_yi = sqrt (sigma2 * unc_yi); #not exactly the same as unc_y as calculated in csaps_sel even if xi = x, but fairly close 177 endif 178 endif 179 180 181endfunction 182 183%!shared x,y,xi,yi,p,sigma2,unc_yi,df 184%! x = ([1:10 10.5 11.3])'; y = sin(x); xi = linspace(min(x), max(x), 30)'; 185%!assert (csaps(x,y,1,x), y, 10*eps); 186%!assert (csaps(x,y,1,x'), y', 10*eps); 187%!assert (csaps(x',y',1,x'), y', 10*eps); 188%!assert (csaps(x',y',1,x), y, 10*eps); 189%!assert (csaps(x,[y 2*y],1,x)', [y 2*y], 10*eps); 190%! [yi,p,sigma2,unc_yi,df] = csaps(x,y,1,xi); 191%!assert (yi, ppval(csape(x, y, "variational"), xi), eps); 192%!assert (p, 1); 193%!assert (unc_yi, zeros(size(xi))); 194%!assert (sigma2, 0); 195%!assert (df, numel(x)); 196%! [yi,p,~,~,df] = csaps(x,y,0,xi); 197%!assert (yi, polyval(polyfit(x, y, 1), xi), 10*eps); 198%!assert (p, 0); 199%!assert (df, 2, 100*eps); 200 201%{ 202test weighted smoothing: 203n = 500; 204a = 0; b = pi; 205f = @(x) sin(x); 206x = a + (b-a)*sort(rand(n, 1)); 207w = rand(n, 1); 208y = f(x) + randn(n, 1) ./ sqrt(w); 209xi = linspace(a, b, n)'; 210yi_target = f(xi); 211[~,p_sel] = csaps_sel(x, y, xi, w, 1); 212[yi,~,sigma2,unc_yi] = csaps(x,y,p_sel,xi,w); 213rmse = rms((yi - yi_target)); 214rmse_weighted = rms((yi - yi_target) ./ unc_yi); 215#worse results without the (correct) weighting: 216[~,p_sel] = csaps_sel(x, y, xi, []); 217[yi,~,sigma2,unc_yi] = csaps(x,y,p_sel,xi,[]); 218rmse_u = rms((yi - yi_target)); 219rmse_u_weighted = rms((yi - yi_target) ./ unc_yi); 220%} 221