1% File src/library/stats/man/ppr.Rd 2% Part of the R package, https://www.R-project.org 3% Copyright 1995-2018 R Core Team 4% Distributed under GPL 2 or later 5 6% file stats/man/ppr.Rd 7% copyright (C) 1995-8 B. D. Ripley 8% copyright (C) 2000-3 The R Core Team 9 10\name{ppr} 11\alias{ppr} 12\alias{ppr.default} 13\alias{ppr.formula} 14\title{Projection Pursuit Regression} 15\description{ 16 Fit a projection pursuit regression model. 17} 18\usage{ 19ppr(x, \dots) 20 21\method{ppr}{formula}(formula, data, weights, subset, na.action, 22 contrasts = NULL, \dots, model = FALSE) 23 24\method{ppr}{default}(x, y, weights = rep(1, n), 25 ww = rep(1, q), nterms, max.terms = nterms, optlevel = 2, 26 sm.method = c("supsmu", "spline", "gcvspline"), 27 bass = 0, span = 0, df = 5, gcvpen = 1, trace = FALSE, \dots) 28} 29\arguments{ 30 \item{formula}{ 31 a formula specifying one or more numeric response variables and the 32 explanatory variables. 33 } 34 \item{x}{ 35 numeric matrix of explanatory variables. Rows represent observations, and 36 columns represent variables. Missing values are not accepted. 37 } 38 \item{y}{ 39 numeric matrix of response variables. Rows represent observations, and 40 columns represent variables. Missing values are not accepted. 41 } 42 \item{nterms}{number of terms to include in the final model.} 43 \item{data}{ 44 a data frame (or similar: see \code{\link{model.frame}}) from which 45 variables specified in \code{formula} are preferentially to be taken. 46 } 47 \item{weights}{a vector of weights \code{w_i} for each \emph{case}.} 48 \item{ww}{ 49 a vector of weights for each \emph{response}, so the fit criterion is 50 the sum over case \code{i} and responses \code{j} of 51 \code{w_i ww_j (y_ij - fit_ij)^2} divided by the sum of \code{w_i}. 52 } 53 \item{subset}{ 54 an index vector specifying the cases to be used in the training 55 sample. (NOTE: If given, this argument must be named.) 56 } 57 \item{na.action}{ 58 a function to specify the action to be taken if \code{\link{NA}}s are 59 found. The default action is given by \code{getOption("na.action")}. 60 (NOTE: If given, this argument must be named.) 61 } 62 \item{contrasts}{ 63 the contrasts to be used when any factor explanatory variables are coded. 64 } 65 \item{max.terms}{ 66 maximum number of terms to choose from when building the model. 67 } 68 \item{optlevel}{ 69 integer from 0 to 3 which determines the thoroughness of an 70 optimization routine in the SMART program. See the \sQuote{Details} 71 section. 72 } 73 \item{sm.method}{ 74 the method used for smoothing the ridge functions. The default is 75 to use Friedman's super smoother \code{\link{supsmu}}. The 76 alternatives are to use the smoothing spline code underlying 77 \code{\link{smooth.spline}}, either with a specified (equivalent) 78 degrees of freedom for each ridge functions, or to allow the 79 smoothness to be chosen by GCV. 80 81 Can be abbreviated. 82 } 83 \item{bass}{ 84 super smoother bass tone control used with automatic span selection 85 (see \code{supsmu}); the range of values is 0 to 10, with larger values 86 resulting in increased smoothing. 87 } 88 \item{span}{ 89 super smoother span control (see \code{\link{supsmu}}). The default, \code{0}, 90 results in automatic span selection by local cross validation. \code{span} 91 can also take a value in \code{(0, 1]}. 92 } 93 \item{df}{ 94 if \code{sm.method} is \code{"spline"} specifies the smoothness of 95 each ridge term via the requested equivalent degrees of freedom. 96 } 97 \item{gcvpen}{ 98 if \code{sm.method} is \code{"gcvspline"} this is the penalty used 99 in the GCV selection for each degree of freedom used. 100 } 101 \item{trace}{logical indicating if each spline fit should produce 102 diagnostic output (about \code{lambda} and \code{df}), and the 103 supsmu fit about its steps.} 104 \item{\dots}{arguments to be passed to or from other methods.} 105 \item{model}{logical. If true, the model frame is returned.} 106} 107\value{ 108A list with the following components, many of which are for use by the 109method functions. 110 111\item{call}{the matched call} 112\item{p}{the number of explanatory variables (after any coding)} 113\item{q}{the number of response variables} 114\item{mu}{the argument \code{nterms}} 115\item{ml}{the argument \code{max.terms}} 116\item{gof}{the overall residual (weighted) sum of squares for the 117 selected model} 118\item{gofn}{the overall residual (weighted) sum of squares against the 119 number of terms, up to \code{max.terms}. Will be invalid (and zero) 120 for less than \code{nterms}.} 121\item{df}{the argument \code{df}} 122\item{edf}{if \code{sm.method} is \code{"spline"} or \code{"gcvspline"} 123 the equivalent number of degrees of freedom for each ridge term used.} 124\item{xnames}{the names of the explanatory variables} 125\item{ynames}{the names of the response variables} 126\item{alpha}{a matrix of the projection directions, with a column for 127 each ridge term} 128\item{beta}{a matrix of the coefficients applied for each response to 129 the ridge terms: the rows are the responses and the columns the ridge terms} 130\item{yb}{the weighted means of each response} 131\item{ys}{the overall scale factor used: internally the responses are 132 divided by \code{ys} to have unit total weighted sum of squares.} 133\item{fitted.values}{the fitted values, as a matrix if \code{q > 1}.} 134\item{residuals}{the residuals, as a matrix if \code{q > 1}.} 135\item{smod}{internal work array, which includes the ridge functions 136 evaluated at the training set points.} 137\item{model}{(only if \code{model = TRUE}) the model frame.} 138} 139\details{ 140 The basic method is given by Friedman (1984), and is essentially the 141 same code used by S-PLUS's \code{ppreg}. This code is extremely 142 sensitive to the compiler used. 143 144 The algorithm first adds up to \code{max.terms} ridge terms one at a 145 time; it will use less if it is unable to find a term to add that makes 146 sufficient difference. It then removes the least 147 important term at each step until \code{nterms} terms 148 are left. 149 150 The levels of optimization (argument \code{optlevel}) 151 differ in how thoroughly the models are refitted during this process. 152 At level 0 the existing ridge terms are not refitted. At level 1 153 the projection directions are not refitted, but the ridge 154 functions and the regression coefficients are. 155% 156 Levels 2 and 3 refit all the terms and are equivalent for one 157 response; level 3 is more careful to re-balance the contributions 158 from each regressor at each step and so is a little less likely to 159 converge to a saddle point of the sum of squares criterion. 160} 161\source{ 162 Friedman (1984): converted to double precision and added interface to 163 smoothing splines by B. D. Ripley, originally for the \CRANpkg{MASS} 164 package. 165} 166 167\references{ 168 Friedman, J. H. and Stuetzle, W. (1981). 169 Projection pursuit regression. 170 \emph{Journal of the American Statistical Association}, 171 \bold{76}, 817--823. 172 \doi{10.2307/2287576}. 173 174 Friedman, J. H. (1984). 175 SMART User's Guide. 176 Laboratory for Computational Statistics, Stanford University Technical 177 Report No.\sspace{}1. 178 179 Venables, W. N. and Ripley, B. D. (2002). 180 \emph{Modern Applied Statistics with S}. 181 Springer. 182} 183\seealso{ 184 \code{\link{plot.ppr}}, \code{\link{supsmu}}, \code{\link{smooth.spline}} 185} 186\examples{ 187require(graphics) 188 189# Note: your numerical values may differ 190attach(rock) 191area1 <- area/10000; peri1 <- peri/10000 192rock.ppr <- ppr(log(perm) ~ area1 + peri1 + shape, 193 data = rock, nterms = 2, max.terms = 5) 194rock.ppr 195# Call: 196# ppr.formula(formula = log(perm) ~ area1 + peri1 + shape, data = rock, 197# nterms = 2, max.terms = 5) 198# 199# Goodness of fit: 200# 2 terms 3 terms 4 terms 5 terms 201# 8.737806 5.289517 4.745799 4.490378 202 203summary(rock.ppr) 204# ..... (same as above) 205# ..... 206# 207# Projection direction vectors ('alpha'): 208# term 1 term 2 209# area1 0.34357179 0.37071027 210# peri1 -0.93781471 -0.61923542 211# shape 0.04961846 0.69218595 212# 213# Coefficients of ridge terms: 214# term 1 term 2 215# 1.6079271 0.5460971 216 217par(mfrow = c(3,2)) # maybe: , pty = "s") 218plot(rock.ppr, main = "ppr(log(perm)~ ., nterms=2, max.terms=5)") 219plot(update(rock.ppr, bass = 5), main = "update(..., bass = 5)") 220plot(update(rock.ppr, sm.method = "gcv", gcvpen = 2), 221 main = "update(..., sm.method=\"gcv\", gcvpen=2)") 222cbind(perm = rock$perm, prediction = round(exp(predict(rock.ppr)), 1)) 223detach() 224} 225\keyword{regression} 226 227