1\name{modular}
2\title{Modular Functions for Mixed Model Fits}
3\alias{glFormula}
4\alias{lFormula}
5\alias{mkGlmerDevfun}
6\alias{mkLmerDevfun}
7\alias{modular}
8\alias{optimizeGlmer}
9\alias{optimizeLmer}
10\alias{updateGlmerDevfun}
11\usage{
12lFormula(formula, data = NULL, REML = TRUE,
13    subset, weights, na.action, offset, contrasts = NULL,
14    control = lmerControl(), ...)
15
16mkLmerDevfun(fr, X, reTrms, REML = TRUE, start = NULL,
17    verbose = 0, control = lmerControl(), ...)
18
19optimizeLmer(devfun,
20             optimizer    = formals(lmerControl)$optimizer,
21             restart_edge = formals(lmerControl)$restart_edge,
22             boundary.tol = formals(lmerControl)$boundary.tol,
23             start = NULL, verbose = 0L,
24             control = list(), ...)
25
26glFormula(formula, data = NULL, family = gaussian,
27    subset, weights, na.action, offset, contrasts = NULL,
28    start, mustart, etastart, control = glmerControl(), ...)
29
30mkGlmerDevfun(fr, X, reTrms, family, nAGQ = 1L,
31              verbose = 0L, maxit = 100L, control = glmerControl(), ...)
32
33optimizeGlmer(devfun,
34    optimizer = if(stage == 1) "bobyqa" else "Nelder_Mead",
35    restart_edge = FALSE,
36    boundary.tol = formals(glmerControl)$boundary.tol,
37    verbose = 0L, control = list(),
38    nAGQ = 1L, stage = 1, start = NULL, ...)
39
40updateGlmerDevfun(devfun, reTrms, nAGQ = 1L)
41}
42\arguments{
43  \item{formula}{a two-sided linear formula object
44    describing both the fixed-effects and random-effects parts
45    of the model, with the response on the left of a \code{~}
46    operator and the terms, separated by \code{+} operators,
47    on the right.  Random-effects terms are distinguished by
48    vertical bars (\code{"|"}) separating expressions for
49    design matrices from grouping factors.}
50
51  \item{data}{an optional data frame containing the
52    variables named in \code{formula}.  By default the
53    variables are taken from the environment from which
54    \code{lmer} is called. While \code{data} is optional, the
55    package authors \emph{strongly} recommend its use,
56    especially when later applying methods such as
57    \code{update} and \code{drop1} to the fitted model
58    (\emph{such methods are not guaranteed to work properly
59      if \code{data} is omitted}). If \code{data} is omitted,
60    variables will be taken from the environment of
61    \code{formula} (if specified as a formula) or from the
62    parent frame (if specified as a character vector).}
63
64  \item{REML}{(logical) indicating to fit \bold{re}stricted maximum
65    likelihood model.}
66
67  \item{subset}{an optional expression indicating the
68    subset of the rows of \code{data} that should be used in
69    the fit. This can be a logical vector, or a numeric
70    vector indicating which observation numbers are to be
71    included, or a character vector of the row names to be
72    included.  All observations are included by default.}
73
74  \item{weights}{an optional vector of \sQuote{prior weights} to be used
75    in the fitting process.  Should be \code{NULL} or a numeric vector.}
76
77  \item{na.action}{a function that indicates what should
78    happen when the data contain \code{NA}s.  The default
79    action (\code{na.omit}, inherited from the 'factory
80    fresh' value of \code{getOption("na.action")}) strips any
81    observations with any missing values in any variables.}
82
83  \item{offset}{this can be used to specify an \emph{a priori} known
84    component to be included in the linear predictor during
85    fitting.  This should be \code{NULL} or a numeric vector of length
86    equal to the number of cases.  One or more \code{\link{offset}}
87    terms can be included in the formula instead or as well, and if
88    more than one is specified their sum is used.  See
89    \code{\link{model.offset}}.}
90
91  \item{contrasts}{an optional \code{\link{list}}.  See the
92    \code{contrasts.arg} of \code{\link{model.matrix.default}}.}
93
94  \item{control}{a list giving
95    \describe{
96      \item{for \code{[g]lFormula}:}{all
97	options for running the model, see \code{\link{lmerControl}};}
98      \item{for \code{mkLmerDevfun,mkGlmerDevfun}:}{options
99	for the inner optimization step;}
100      \item{for \code{optimizeLmer} and \code{optimizeGlmer}:}{control
101	parameters for nonlinear optimizer (typically inherited from the
102	\dots argument to \code{\link{lmerControl}}).}
103	% FIXME: reference optCtrl
104    }
105  }
106  \item{fr}{A model frame containing the variables needed to create an
107    \code{\link{lmerResp}} or \code{\link{glmResp}} instance.}
108  \item{X}{fixed-effects design matrix}
109  \item{reTrms}{information on random effects structure (see
110    \code{\link{mkReTrms}}).}
111  \item{start}{starting values (see \code{\link{lmer}};
112    for \code{glFormula}, should be just a numeric vector of
113    fixed-effect coefficients)}
114  \item{verbose}{print output?}
115  \item{maxit}{maximal number of Pwrss update iterations.}
116  \item{devfun}{a deviance function, as generated by \code{\link{mkLmerDevfun}}}
117  \item{nAGQ}{number of Gauss-Hermite quadrature points}
118  \item{stage}{optimization stage (1: nAGQ=0, optimize over theta only;
119    2: nAGQ possibly >0, optimize over theta and beta)}
120  \item{optimizer}{character - name of optimizing
121    function(s).  A character vector or list of functions:
122    length 1 for \code{lmer} or \code{glmer}, possibly length
123    2 for \code{glmer}.  The built-in optimizers are
124    \code{"\link{Nelder_Mead}"} and \code{"\link[minqa]{bobyqa}"}
125    (from the \CRANpkg{minqa} package).  Any minimizing function
126    that allows box constraints can be used provided that it
127    \enumerate{
128      \item{takes input parameters \code{fn} (function to be
129	optimized), \code{par} (starting parameter values),
130	\code{lower} (lower bounds) and \code{control} (control
131	parameters, passed through from the \code{control}
132	argument) and}
133      \item{returns a list with (at least) elements
134	\code{par} (best-fit parameters), \code{fval} (best-fit
135	function value), \code{conv} (convergence code) and
136	(optionally) \code{message} (informational message, or
137	explanation of convergence failure)}.
138    }
139    Special provisions are made for \code{\link{bobyqa}},
140    \code{\link{Nelder_Mead}}, and optimizers wrapped in the
141    \CRANpkg{optimx} package; to use \pkg{optimx} optimizers
142    (including \code{L-BFGS-B} from base \code{\link{optim}}
143    and \code{\link{nlminb}}), pass the \code{method}
144    argument to \code{optim} in the \code{control} argument.
145
146    For \code{glmer}, if \code{length(optimizer)==2}, the
147    first element will be used for the preliminary (random
148    effects parameters only) optimization, while the second
149    will be used for the final (random effects plus fixed
150    effect parameters) phase. See \code{\link{modular}} for
151    more information on these two phases.
152  }
153  \item{restart_edge}{see \code{\link{lmerControl}}}
154  \item{boundary.tol}{see \code{\link{lmerControl}}}
155  \item{family}{a GLM family; see \code{\link[stats]{glm}}
156    and \code{\link[stats]{family}}.}
157  \item{mustart}{optional starting values on the scale of
158    the conditional mean; see \code{\link[stats]{glm}} for details.}
159  \item{etastart}{optional starting values on the scale of
160    the unbounded predictor; see \code{\link[stats]{glm}} for details.}
161  \item{\dots}{other potential arguments; for \code{optimizeLmer} and
162    \code{optimizeGlmer}, these are passed to internal function
163    \code{optwrap}, which has relevant parameters \code{calc.derivs}
164    and \code{use.last.params} (see \code{\link{lmerControl}}).}
165}
166\value{
167  \code{lFormula} and \code{glFormula} return a list containing
168  components:
169
170  \describe{
171    \item{fr}{model frame}
172     \item{X}{fixed-effect design matrix}
173     \item{reTrms}{list containing information on random effects structure:
174     result of \code{\link{mkReTrms}}}
175     \item{REML}{(lFormula only): logical indicating if restricted maximum
176     likelihood was used (Copy of argument.)}
177  }
178
179  \code{mkLmerDevfun} and \code{mkGlmerDevfun} return a function to
180      calculate deviance (or restricted deviance) as a function of the
181      theta (random-effect) parameters.  \code{updateGlmerDevfun}
182      returns a function to calculate the deviance as a function of a
183      concatenation of theta and beta (fixed-effect) parameters. These
184      deviance functions have an environment containing objects required
185      for their evaluation. CAUTION: The \code{\link{environment}} of
186      functions returned by \code{mk(Gl|L)merDevfun} contains reference
187      class objects (see \code{\link{ReferenceClasses}},
188      \code{\link{merPredD-class}}, \code{\link{lmResp-class}}), which
189      behave in ways that may surprise many users. For example, if the
190      output of \code{mk(Gl|L)merDevfun} is naively copied, then
191      modifications to the original will also appear in the copy (and
192      vice versa). To avoid this behavior one must make a deep copy (see
193      \code{\link{ReferenceClasses}} for details).
194
195  \code{optimizeLmer} and \code{optimizeGlmer} return the results of an
196  optimization.
197}
198\description{
199  Modular functions for mixed model fits
200}
201\details{
202  These functions make up the internal components of an [gn]lmer fit.
203  \itemize{
204    \item \code{[g]lFormula} takes the arguments that would normally be
205    passed to \code{[g]lmer}, checking for errors and processing the
206    formula and data input to create a list of objects required to fit a
207    mixed model.
208    \item \code{mk(Gl|L)merDevfun} takes the output of the previous
209    step (minus the \code{formula} component) and creates a
210    deviance function
211    \item \code{optimize(Gl|L)mer} takes a
212    deviance function and optimizes over \code{theta} (or
213    over \code{theta} and \code{beta}, if \code{stage} is set
214    to 2 for \code{optimizeGlmer}
215    \item \code{updateGlmerDevfun} takes the first stage of a GLMM
216    optimization (with \code{nAGQ=0}, optimizing over \code{theta} only)
217    and produces a second-stage deviance function
218    \item \code{\link{mkMerMod}} takes the \emph{environment} of a
219    deviance function, the results of an optimization, a list of
220    random-effect terms, a model frame, and a model all and produces a
221    \code{[g]lmerMod} object.
222  }
223}
224\examples{
225### Fitting a linear mixed model in 4 modularized steps
226
227## 1.  Parse the data and formula:
228lmod <- lFormula(Reaction ~ Days + (Days|Subject), sleepstudy)
229names(lmod)
230## 2.  Create the deviance function to be optimized:
231(devfun <- do.call(mkLmerDevfun, lmod))
232ls(environment(devfun)) # the environment of 'devfun' contains objects
233                        # required for its evaluation
234## 3.  Optimize the deviance function:
235opt <- optimizeLmer(devfun)
236opt[1:3]
237## 4.  Package up the results:
238mkMerMod(environment(devfun), opt, lmod$reTrms, fr = lmod$fr)
239
240
241### Same model in one line
242lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
243
244
245### Fitting a generalized linear mixed model in six modularized steps
246
247## 1.  Parse the data and formula:
248glmod <- glFormula(cbind(incidence, size - incidence) ~ period + (1 | herd),
249                   data = cbpp, family = binomial)
250    #.... see what've got :
251str(glmod, max=1, give.attr=FALSE)
252## 2.  Create the deviance function for optimizing over theta:
253(devfun <- do.call(mkGlmerDevfun, glmod))
254ls(environment(devfun)) # the environment of devfun contains lots of info
255## 3.  Optimize over theta using a rough approximation (i.e. nAGQ = 0):
256(opt <- optimizeGlmer(devfun))
257## 4.  Update the deviance function for optimizing over theta and beta:
258(devfun <- updateGlmerDevfun(devfun, glmod$reTrms))
259## 5.  Optimize over theta and beta:
260opt <- optimizeGlmer(devfun, stage=2)
261str(opt, max=1) # seeing what we'got
262## 6.  Package up the results:
263(fMod <- mkMerMod(environment(devfun), opt, glmod$reTrms, fr = glmod$fr))
264
265### Same model in one line
266fM <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
267            data = cbpp, family = binomial)
268all.equal(fMod, fM, check.attributes=FALSE, tolerance = 1e-12)
269        # ----  --  even tolerance = 0  may work
270}
271\keyword{models}
272
273