1% File src/library/stats/man/step.Rd
2% Part of the R package, https://www.R-project.org
3% Copyright 1995-2014 R Core Team
4% Distributed under GPL 2 or later
5
6\name{step}
7\alias{step}
8\title{
9Choose a model by AIC in a Stepwise Algorithm
10}
11\description{
12  Select a formula-based model by AIC.
13}
14\usage{
15step(object, scope, scale = 0,
16     direction = c("both", "backward", "forward"),
17     trace = 1, keep = NULL, steps = 1000, k = 2, \dots)
18}
19\arguments{
20  \item{object}{
21    an object representing a model of an appropriate class (mainly
22    \code{"lm"} and \code{"glm"}).
23    This is used as the initial model in the stepwise search.
24  }
25  \item{scope}{
26    defines the range of models examined in the stepwise search.
27    This should be either a single formula, or a list containing
28    components \code{upper} and \code{lower}, both formulae.  See the
29    details for how to specify the formulae and how they are used.
30  }
31  \item{scale}{
32    used in the definition of the AIC statistic for selecting the models,
33    currently only for \code{\link{lm}}, \code{\link{aov}} and
34    \code{\link{glm}} models.  The default value, \code{0}, indicates
35    the scale should be estimated: see \code{\link{extractAIC}}.
36  }
37  \item{direction}{
38    the mode of stepwise search, can be one of \code{"both"},
39    \code{"backward"}, or \code{"forward"}, with a default of \code{"both"}.
40    If the \code{scope} argument is missing the default for
41    \code{direction} is \code{"backward"}.  Values can be abbreviated.
42  }
43  \item{trace}{
44    if positive, information is printed during the running of \code{step}.
45    Larger values may give more detailed information.
46  }
47  \item{keep}{
48    a filter function whose input is a fitted model object and the
49    associated \code{AIC} statistic, and whose output is arbitrary.
50    Typically \code{keep} will select a subset of the components of
51    the object and return them. The default is not to keep anything.
52  }
53  \item{steps}{
54    the maximum number of steps to be considered.  The default is 1000
55    (essentially as many as required).  It is typically used to stop the
56    process early.
57  }
58  \item{k}{
59    the multiple of the number of degrees of freedom used for the penalty.
60    Only \code{k = 2} gives the genuine AIC: \code{k = log(n)} is sometimes
61    referred to as BIC or SBC.
62  }
63  \item{\dots}{
64    any additional arguments to \code{\link{extractAIC}}.
65  }
66}
67\value{
68  the stepwise-selected model is returned, with up to two additional
69  components.  There is an \code{"anova"} component corresponding to the
70  steps taken in the search, as well as a \code{"keep"} component if the
71  \code{keep=} argument was supplied in the call. The
72  \code{"Resid. Dev"} column of the analysis of deviance table refers
73  to a constant minus twice the maximized log likelihood: it will be a
74  deviance only in cases where a saturated model is well-defined
75  (thus excluding \code{lm}, \code{aov} and \code{survreg} fits,
76  for example).
77}
78\details{
79  \code{step} uses \code{\link{add1}} and \code{\link{drop1}}
80  repeatedly; it will work for any method for which they work, and that
81  is determined by having a valid method for \code{\link{extractAIC}}.
82  When the additive constant can be chosen so that AIC is equal to
83  Mallows' \eqn{C_p}{Cp}, this is done and the tables are labelled
84  appropriately.
85
86  The set of models searched is determined by the \code{scope} argument.
87  The right-hand-side of its \code{lower} component is always included
88  in the model, and right-hand-side of the model is included in the
89  \code{upper} component.  If \code{scope} is a single formula, it
90  specifies the \code{upper} component, and the \code{lower} model is
91  empty.  If \code{scope} is missing, the initial model is used as the
92  \code{upper} model.
93
94  Models specified by \code{scope} can be templates to update
95  \code{object} as used by \code{\link{update.formula}}.  So using
96  \code{.} in a \code{scope} formula means \sQuote{what is
97  already there}, with \code{.^2} indicating all interactions of
98  existing terms.
99
100  There is a potential problem in using \code{\link{glm}} fits with a
101  variable \code{scale}, as in that case the deviance is not simply
102  related to the maximized log-likelihood.  The \code{"glm"} method for
103  function \code{\link{extractAIC}} makes the
104  appropriate adjustment for a \code{gaussian} family, but may need to be
105  amended for other cases.  (The \code{binomial} and \code{poisson}
106  families have fixed \code{scale} by default and do not correspond
107  to a particular maximum-likelihood problem for variable \code{scale}.)
108}
109\note{
110  This function differs considerably from the function in S, which uses a
111  number of approximations and does not in general compute the correct AIC.
112
113  This is a minimal implementation.  Use \code{\link[MASS]{stepAIC}}
114  in package \CRANpkg{MASS} for a wider range of object classes.
115}
116\section{Warning}{
117  The model fitting must apply the models to the same dataset. This
118  may be a problem if there are missing values and \R's default of
119  \code{na.action = na.omit} is used.  We suggest you remove the
120  missing values first.
121
122  Calls to the function \code{\link{nobs}} are used to check that the
123  number of observations involved in the fitting process remains unchanged.
124}
125\seealso{
126  \code{\link[MASS]{stepAIC}} in \CRANpkg{MASS}, \code{\link{add1}},
127  \code{\link{drop1}}
128}
129\references{
130  Hastie, T. J. and Pregibon, D. (1992)
131  \emph{Generalized linear models.}
132  Chapter 6 of \emph{Statistical Models in S}
133  eds J. M. Chambers and T. J. Hastie, Wadsworth & Brooks/Cole.
134
135  Venables, W. N. and Ripley, B. D. (2002)
136  \emph{Modern Applied Statistics with S.}
137  New York: Springer (4th ed).
138}
139\author{
140  B. D. Ripley: \code{step} is a slightly simplified version of
141  \code{\link[MASS]{stepAIC}} in package \CRANpkg{MASS} (Venables &
142  Ripley, 2002 and earlier editions).
143
144  The idea of a \code{step} function follows that described in Hastie &
145  Pregibon (1992); but the implementation in \R is more general.
146}
147\examples{\donttest{
148## following on from example(lm)
149\dontshow{utils::example("lm", echo = FALSE)}
150step(lm.D9)
151
152summary(lm1 <- lm(Fertility ~ ., data = swiss))
153slm1 <- step(lm1)
154summary(slm1)
155slm1$anova
156}}
157\keyword{models}
158