1% File src/library/stats/man/ppr.Rd
2% Part of the R package, https://www.R-project.org
3% Copyright 1995-2018 R Core Team
4% Distributed under GPL 2 or later
5
6% file stats/man/ppr.Rd
7% copyright (C) 1995-8 B. D. Ripley
8% copyright (C) 2000-3   The R Core Team
9
10\name{ppr}
11\alias{ppr}
12\alias{ppr.default}
13\alias{ppr.formula}
14\title{Projection Pursuit Regression}
15\description{
16  Fit a projection pursuit regression model.
17}
18\usage{
19ppr(x, \dots)
20
21\method{ppr}{formula}(formula, data, weights, subset, na.action,
22    contrasts = NULL, \dots, model = FALSE)
23
24\method{ppr}{default}(x, y, weights = rep(1, n),
25    ww = rep(1, q), nterms, max.terms = nterms, optlevel = 2,
26    sm.method = c("supsmu", "spline", "gcvspline"),
27    bass = 0, span = 0, df = 5, gcvpen = 1, trace = FALSE, \dots)
28}
29\arguments{
30  \item{formula}{
31    a formula specifying one or more numeric response variables and the
32    explanatory variables.
33  }
34  \item{x}{
35    numeric matrix of explanatory variables.  Rows represent observations, and
36    columns represent variables.  Missing values are not accepted.
37  }
38  \item{y}{
39    numeric matrix of response variables.  Rows represent observations, and
40    columns represent variables.  Missing values are not accepted.
41  }
42  \item{nterms}{number of terms to include in the final model.}
43  \item{data}{
44    a data frame (or similar: see \code{\link{model.frame}}) from which
45    variables specified in \code{formula} are preferentially to be taken.
46  }
47  \item{weights}{a vector of weights \code{w_i} for each \emph{case}.}
48  \item{ww}{
49    a vector of weights for each \emph{response}, so the fit criterion is
50    the sum over case \code{i} and responses \code{j} of
51    \code{w_i ww_j (y_ij - fit_ij)^2} divided by the sum of \code{w_i}.
52  }
53  \item{subset}{
54    an index vector specifying the cases to be used in the training
55    sample.  (NOTE: If given, this argument must be named.)
56  }
57  \item{na.action}{
58    a function to specify the action to be taken if \code{\link{NA}}s are
59    found. The default action is given by \code{getOption("na.action")}.
60    (NOTE: If given, this argument must be named.)
61  }
62  \item{contrasts}{
63    the contrasts to be used when any factor explanatory variables are coded.
64  }
65  \item{max.terms}{
66    maximum number of terms to choose from when building the model.
67  }
68  \item{optlevel}{
69    integer from 0 to 3 which determines the thoroughness of an
70    optimization routine in the SMART program. See the \sQuote{Details}
71    section.
72  }
73  \item{sm.method}{
74    the method used for smoothing the ridge functions.  The default is
75    to use Friedman's super smoother \code{\link{supsmu}}.  The
76    alternatives are to use the smoothing spline code underlying
77    \code{\link{smooth.spline}}, either with a specified (equivalent)
78    degrees of freedom for each ridge functions, or to allow the
79    smoothness to be chosen by GCV.
80
81    Can be abbreviated.
82  }
83  \item{bass}{
84    super smoother bass tone control used with automatic span selection
85    (see \code{supsmu}); the range of values is 0 to 10, with larger values
86    resulting in increased smoothing.
87  }
88  \item{span}{
89    super smoother span control (see \code{\link{supsmu}}).  The default, \code{0},
90    results in automatic span selection by local cross validation. \code{span}
91    can also take a value in \code{(0, 1]}.
92  }
93  \item{df}{
94    if \code{sm.method} is \code{"spline"} specifies the smoothness of
95    each ridge term via the requested equivalent degrees of freedom.
96  }
97  \item{gcvpen}{
98    if \code{sm.method} is \code{"gcvspline"} this is the penalty used
99    in the GCV selection for each degree of freedom used.
100  }
101  \item{trace}{logical indicating if each spline fit should produce
102    diagnostic output (about \code{lambda} and \code{df}), and the
103    supsmu fit about its steps.}
104  \item{\dots}{arguments to be passed to or from other methods.}
105  \item{model}{logical.  If true, the model frame is returned.}
106}
107\value{
108A list with the following components, many of which are for use by the
109method functions.
110
111\item{call}{the matched call}
112\item{p}{the number of explanatory variables (after any coding)}
113\item{q}{the number of response variables}
114\item{mu}{the argument \code{nterms}}
115\item{ml}{the argument \code{max.terms}}
116\item{gof}{the overall residual (weighted) sum of squares for the
117  selected model}
118\item{gofn}{the overall residual (weighted) sum of squares against the
119  number of terms, up to \code{max.terms}.  Will be invalid (and zero)
120  for less than \code{nterms}.}
121\item{df}{the argument \code{df}}
122\item{edf}{if \code{sm.method} is \code{"spline"} or \code{"gcvspline"}
123  the equivalent number of degrees of freedom for each ridge term used.}
124\item{xnames}{the names of the explanatory variables}
125\item{ynames}{the names of the response variables}
126\item{alpha}{a matrix of the projection directions, with a column for
127  each ridge term}
128\item{beta}{a matrix of the coefficients applied for each response to
129  the ridge terms: the rows are the responses and the columns the ridge terms}
130\item{yb}{the weighted means of each response}
131\item{ys}{the overall scale factor used: internally the responses are
132  divided by \code{ys} to have unit total weighted sum of squares.}
133\item{fitted.values}{the fitted values, as a matrix if \code{q > 1}.}
134\item{residuals}{the residuals, as a matrix if \code{q > 1}.}
135\item{smod}{internal work array, which includes the ridge functions
136  evaluated at the training set points.}
137\item{model}{(only if \code{model = TRUE}) the model frame.}
138}
139\details{
140  The basic method is given by Friedman (1984), and is essentially the
141  same code used by S-PLUS's \code{ppreg}.  This code is extremely
142  sensitive to the compiler used.
143
144  The algorithm first adds up to \code{max.terms} ridge terms one at a
145  time; it will use less if it is unable to find a term to add that makes
146  sufficient difference.  It then removes the least
147  important term at each step until \code{nterms} terms
148  are left.
149
150  The levels of optimization (argument \code{optlevel})
151  differ in how thoroughly the models are refitted during this process.
152  At level 0 the existing ridge terms are not refitted.  At level 1
153  the projection directions are not refitted, but the ridge
154  functions and the regression coefficients are.
155%
156  Levels 2 and 3 refit all the terms and are equivalent for one
157  response; level 3 is more careful to re-balance the contributions
158  from each regressor at each step and so is a little less likely to
159  converge to a saddle point of the sum of squares criterion.
160}
161\source{
162  Friedman (1984): converted to double precision and added interface to
163  smoothing splines by B. D. Ripley, originally for the \CRANpkg{MASS}
164  package.
165}
166
167\references{
168  Friedman, J. H. and Stuetzle, W. (1981).
169  Projection pursuit regression.
170  \emph{Journal of the American Statistical Association},
171  \bold{76}, 817--823.
172  \doi{10.2307/2287576}.
173
174  Friedman, J. H. (1984).
175  SMART User's Guide.
176  Laboratory for Computational Statistics, Stanford University Technical
177  Report No.\sspace{}1.
178
179  Venables, W. N. and Ripley, B. D. (2002).
180  \emph{Modern Applied Statistics with S}.
181  Springer.
182}
183\seealso{
184  \code{\link{plot.ppr}}, \code{\link{supsmu}}, \code{\link{smooth.spline}}
185}
186\examples{
187require(graphics)
188
189# Note: your numerical values may differ
190attach(rock)
191area1 <- area/10000; peri1 <- peri/10000
192rock.ppr <- ppr(log(perm) ~ area1 + peri1 + shape,
193                data = rock, nterms = 2, max.terms = 5)
194rock.ppr
195# Call:
196# ppr.formula(formula = log(perm) ~ area1 + peri1 + shape, data = rock,
197#     nterms = 2, max.terms = 5)
198#
199# Goodness of fit:
200#  2 terms  3 terms  4 terms  5 terms
201# 8.737806 5.289517 4.745799 4.490378
202
203summary(rock.ppr)
204# .....  (same as above)
205# .....
206#
207# Projection direction vectors ('alpha'):
208#       term 1      term 2
209# area1  0.34357179  0.37071027
210# peri1 -0.93781471 -0.61923542
211# shape  0.04961846  0.69218595
212#
213# Coefficients of ridge terms:
214#    term 1    term 2
215# 1.6079271 0.5460971
216
217par(mfrow = c(3,2))   # maybe: , pty = "s")
218plot(rock.ppr, main = "ppr(log(perm)~ ., nterms=2, max.terms=5)")
219plot(update(rock.ppr, bass = 5), main = "update(..., bass = 5)")
220plot(update(rock.ppr, sm.method = "gcv", gcvpen = 2),
221     main = "update(..., sm.method=\"gcv\", gcvpen=2)")
222cbind(perm = rock$perm, prediction = round(exp(predict(rock.ppr)), 1))
223detach()
224}
225\keyword{regression}
226
227