1## Copyright (C) 2012-2015 Nir Krakauer
2##
3## This program is free software; you can redistribute it and/or modify
4## it under the terms of the GNU General Public License as published by
5## the Free Software Foundation; either version 2 of the License, or
6## (at your option) any later version.
7##
8## This program is distributed in the hope that it will be useful,
9## but WITHOUT ANY WARRANTY; without even the implied warranty of
10## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11## GNU General Public License for more details.
12##
13## You should have received a copy of the GNU General Public License
14## along with this program; If not, see <http://www.gnu.org/licenses/>.
15
16## -*- texinfo -*-
17## @deftypefn{Function File}{[@var{yi} @var{p} @var{sigma2} @var{unc_yi} @var{df}] =} csaps(@var{x}, @var{y}, @var{p}, @var{xi}, @var{w}=[])
18## @deftypefnx{Function File}{[@var{pp} @var{p} @var{sigma2}] =} csaps(@var{x}, @var{y}, @var{p}, [], @var{w}=[])
19##
20## Cubic spline approximation (smoothing)@*
21## approximate [@var{x},@var{y}], weighted by @var{w} (inverse variance of the @var{y} values; if not given, equal weighting is assumed), at @var{xi}
22##
23## The chosen cubic spline with natural boundary conditions @var{pp}(@var{x}) minimizes @var{p} * Sum_i @var{w}_i*(@var{y}_i - @var{pp}(@var{x}_i))^2  +  (1-@var{p}) * Int @var{pp}''(@var{x}) d@var{x}
24##
25## Outside the range of @var{x}, the cubic spline is a straight line
26##
27## @var{x} and @var{w} should be n by 1 in size; @var{y} should be n by m; @var{xi} should be k by 1; the values in @var{x} should be distinct and in ascending order; the values in @var{w} should be nonzero
28##
29## @table @asis
30## @item @var{p}=0
31##       maximum smoothing: straight line
32## @item @var{p}=1
33##       no smoothing: interpolation
34## @item @var{p}<0 or not given
35##       an intermediate amount of smoothing is chosen @*
36##       and the corresponding @var{p} between 0 and 1 is returned @*
37##       (such that the smoothing term and the interpolation term are of the same magnitude) @*
38##       (csaps_sel provides other methods for automatically selecting the smoothing parameter @var{p}.)
39## @end table
40##
41## @var{sigma2} is an estimate of the data error variance, assuming the smoothing parameter @var{p} is realistic
42##
43## @var{unc_yi} is an estimate of the standard error of the fitted curve(s) at the @var{xi}.
44## Empty if @var{xi} is not provided.
45##
46## @var{df} is an estimate of the degrees of freedom used in the spline fit (2 for @var{p}=0, n for @var{p}=1)
47##
48##
49## References:  @*
50## Carl de Boor (1978), A Practical Guide to Splines, Springer, Chapter XIV  @*
51## Grace Wahba (1983), Bayesian ``confidence intervals'' for the cross-validated smoothing spline, Journal of the Royal Statistical Society, 45B(1):133-150
52##
53## @end deftypefn
54## @seealso{spline, splinefit, csapi, ppval, dedup, bin_values, csaps_sel}
55
56## Author: Nir Krakauer <nkrakauer@ccny.cuny.edu>
57
58function [ret,p,sigma2,unc_yi,df]=csaps(x,y,p,xi,w)
59
60warning ("off", "Octave:broadcast", "local");
61
62  if(nargin < 5)
63    w = [];
64    if(nargin < 4)
65      xi = [];
66      if(nargin < 3)
67	      p = [];
68      endif
69    endif
70  endif
71
72  if(columns(x) > 1)
73    x = x';
74    y = y';
75    w = w';
76  endif
77
78  if any (isnan ([x y w](:)) )
79    error('NaN values in inputs; pre-process to remove them')
80  endif
81
82  h = diff(x);
83  if !all(h > 0) && !all(h < 0)
84	  error('x must be strictly monotone; pre-process to achieve this')
85  endif
86
87
88  [n m] = size(y); #should also be that n = numel(x);
89
90  if isempty(w)
91    w = ones(n, 1);
92  end
93
94
95  R = spdiags([h(2:end) 2*(h(1:end-1) + h(2:end)) h(1:end-1)], [-1 0 1], n-2, n-2);
96
97  QT = spdiags([1 ./ h(1:end-1) -(1 ./ h(1:end-1) + 1 ./ h(2:end)) 1 ./ h(2:end)], [0 1 2], n-2, n);
98
99## if not given, choose p so that trace(6*(1-p)*QT*diag(1 ./ w)*QT') = trace(p*R)
100  if isempty(p) || (p < 0)
101  	r = full(6*trace(QT*diag(1 ./ w)*QT') / trace(R));
102  	p = r ./ (1 + r);
103  endif
104
105## solve for the scaled second derivatives u and for the function values a at the knots
106## (if p = 1, a = y; if p = 0, cc(:) = dd(:) = 0)
107## QT*y can also be written as (y(3:n, :) - y(2:(n-1), :)) ./ h(2:end) - (y(2:(n-1), :) - y(1:(n-2), :)) ./ h(1:(end-1))
108  u = (6*(1-p)*QT*diag(1 ./ w)*QT' + p*R) \ (QT*y);
109  a = y - 6*(1-p)*diag(1 ./ w)*QT'*u;
110
111## derivatives for the piecewise cubic spline
112  aa = bb = cc = dd = zeros (n+1, m);
113  aa(2:end, :) = a;
114  cc(3:n, :) = 6*p*u; #second derivative at endpoints is 0 [natural spline]
115  dd(2:n, :) = diff(cc(2:(n+1), :)) ./ h;
116  bb(2:n, :) = diff(a) ./ h - (h/3) .* (cc(2:n, :) + cc(3:(n+1), :)/2);
117
118## add knots to either end of spline pp-form to ensure linear extrapolation
119  dx_minus = eps(x(1));
120  dx_plus = eps(x(end));
121  xminus = x(1) - dx_minus;
122  xplus = x(end) + dx_plus;
123  x = [xminus; x; xplus];
124  slope_minus = bb(2, :);
125  slope_plus = bb(n, :) + cc(n, :)*h(n-1) + (dd(n, :)/2)*h(n-1)^2;
126  bb(1, :) = slope_minus; #linear extension of splines
127  bb(n + 1, :) = slope_plus;
128  aa(1, :) = a(1, :) - dx_minus*bb(1, :);
129
130  ret = mkpp (x, cat (2, dd'(:)/6, cc'(:)/2, bb'(:), aa'(:)), m);
131  clear a aa bb cc dd slope_minus slope_plus u #these values are no longer needed
132
133  if ~isempty (xi)
134    ret = ppval (ret, xi);
135  endif
136
137  if (isargout (4) && isempty (xi))
138    unc_yi = [];
139  endif
140
141  if isargout (3) || (isargout (4) && ~isempty (xi)) || isargout (5)
142
143    if p == 1 #interpolation assumes no error in the given data
144      sigma2 = 0;
145      if isargout (4) && ~isempty (xi)
146        unc_yi = zeros(numel(xi), 1);
147      endif
148      df = n;
149      return
150    endif
151
152    [U D V] = svd (diag(1 ./ sqrt(w))*QT'*sqrtm(inv(R)), 0); D = diag(D).^2;
153    #influence matrix for given p
154    A = speye(n) - U * diag(D ./ (D + (p / (6*(1-p))))) * U';
155    A = diag (1 ./ sqrt(w)) * A * diag(sqrt(w)); #rescale to original units; a = A*y
156    MSR = mean (w .* (y - (A*y)) .^ 2); #mean square residual
157    df = trace (A);
158    sigma2 = mean (MSR(:)) * (n / (n-df)); #estimated data error variance (wahba83)
159
160    if isargout (4) && ~isempty (xi)
161      ni = numel (xi);
162     #dependence of spline values on each given point (to compute uncertainty)
163      C = 6 * p * full ((6*(1-p)*QT*diag(1 ./ w)*QT' + p*R) \ QT); #cc(3:n, :) = C*y [sparsity is lost]
164      D = diff ([zeros(n, 1) C' zeros(n, 1)]') ./ h; #dd(2:n, :) = D*y
165      B = diff (A) ./ h - (h/3) .* ([zeros(n, 1) C']' + [C' zeros(n, 1)]' / 2); #bb(2:n, :) = B*y
166    #add end-points
167      C = [zeros(n, 2) C' zeros(n, 1)]';
168      D = [zeros(n, 1) D' zeros(n, 1)]';
169      B = [B(1, :)' B' B(end, :)' + C(n, :)'*h(n-1) + (D(n, :)'/2)*h(n-1)^2]';
170      A = [A(1, :)'-eps(x(1))*B(1, :)' A']';
171    #sum the squared dependence on each data value y at each requested point xi
172      unc_yi = zeros (ni, 1);
173      for i = 1:n
174        unc_yi += (ppval (mkpp (x, cat (2, D(:, i)/6, C(:, i)/2, B(:, i), A(:, i))), xi(:))) .^ 2;
175      endfor
176      unc_yi = sqrt (sigma2 * unc_yi); #not exactly the same as unc_y as calculated in csaps_sel even if xi = x, but fairly close
177      endif
178  endif
179
180
181endfunction
182
183%!shared x,y,xi,yi,p,sigma2,unc_yi,df
184%! x = ([1:10 10.5 11.3])'; y = sin(x); xi = linspace(min(x), max(x), 30)';
185%!assert (csaps(x,y,1,x), y, 10*eps);
186%!assert (csaps(x,y,1,x'), y', 10*eps);
187%!assert (csaps(x',y',1,x'), y', 10*eps);
188%!assert (csaps(x',y',1,x), y, 10*eps);
189%!assert (csaps(x,[y 2*y],1,x)', [y 2*y], 10*eps);
190%! [yi,p,sigma2,unc_yi,df] = csaps(x,y,1,xi);
191%!assert (yi, ppval(csape(x, y, "variational"), xi), eps);
192%!assert (p, 1);
193%!assert (unc_yi, zeros(size(xi)));
194%!assert (sigma2, 0);
195%!assert (df, numel(x));
196%! [yi,p,~,~,df] = csaps(x,y,0,xi);
197%!assert (yi, polyval(polyfit(x, y, 1), xi), 10*eps);
198%!assert (p, 0);
199%!assert (df, 2, 100*eps);
200
201%{
202test weighted smoothing:
203n = 500;
204a = 0; b = pi;
205f = @(x) sin(x);
206x = a + (b-a)*sort(rand(n, 1));
207w = rand(n, 1);
208y = f(x) + randn(n, 1) ./ sqrt(w);
209xi = linspace(a, b, n)';
210yi_target = f(xi);
211[~,p_sel] = csaps_sel(x, y, xi, w, 1);
212[yi,~,sigma2,unc_yi] = csaps(x,y,p_sel,xi,w);
213rmse = rms((yi - yi_target));
214rmse_weighted = rms((yi - yi_target) ./ unc_yi);
215#worse results without the (correct) weighting:
216[~,p_sel] = csaps_sel(x, y, xi, []);
217[yi,~,sigma2,unc_yi] = csaps(x,y,p_sel,xi,[]);
218rmse_u = rms((yi - yi_target));
219rmse_u_weighted = rms((yi - yi_target) ./ unc_yi);
220%}
221