1\name{vglm}
2\alias{vglm}
3%\alias{vglm.fit}
4\title{Fitting Vector Generalized Linear Models }
5\description{
6  \code{vglm} fits vector generalized linear models (VGLMs).
7  This very large class of models includes
8  generalized linear models (GLMs) as a special case.
9
10
11}
12\usage{
13vglm(formula, family = stop("argument 'family' needs to be assigned"),
14     data = list(), weights = NULL, subset = NULL,
15     na.action = na.fail, etastart = NULL, mustart = NULL,
16     coefstart = NULL, control = vglm.control(...), offset = NULL,
17     method = "vglm.fit", model = FALSE, x.arg = TRUE, y.arg = TRUE,
18     contrasts = NULL, constraints = NULL, extra = list(),
19     form2 = NULL, qr.arg = TRUE, smart = TRUE, ...)
20}
21%- maybe also `usage' for other objects documented here.
22\arguments{
23
24  \item{formula}{
25  a symbolic description of the model to be fit.
26  The RHS of the formula is applied to each linear
27  predictor.
28  The effect of different variables in each linear predictor
29  can be controlled by specifying constraint matrices---see
30  \code{constraints} below.
31
32
33  }
34  \item{family}{
35  a function of class \code{"vglmff"} (see \code{\link{vglmff-class}})
36  describing what statistical model is to be fitted. This is called a
37  ``\pkg{VGAM} family function''.
38  See \code{\link{CommonVGAMffArguments}}
39  for general information about many types of arguments found in this
40  type of function.
41  The argument name \code{"family"} is used loosely and for
42  the ease of existing \code{\link[stats]{glm}} users;
43  there is no concept of a formal ``error distribution'' for VGLMs.
44  Possibly the argument name should be better \code{"model"}
45  but unfortunately
46  that name has already been taken.
47
48
49
50  }
51  \item{data}{
52  an optional data frame containing the variables in the model.
53  By default the variables are taken from
54  \code{environment(formula)}, typically the environment
55  from which \code{vglm} is called.
56
57
58
59
60  }
61  \item{weights}{
62  an optional vector or matrix of (prior fixed and known) weights
63  to be used in the fitting process.
64  If the \pkg{VGAM} family function handles multiple responses
65  (\eqn{Q > 1} of them, say) then
66  \code{weights} can be a matrix with \eqn{Q} columns.
67  Each column matches the respective response.
68  If it is a vector (the usually case) then it is recycled into a
69  matrix with \eqn{Q} columns.
70  The values of \code{weights} must be positive; try setting
71  a very small value such as \code{1.0e-8} to effectively
72  delete an observation.
73
74
75
76% 20201215:
77
78  Currently the \code{weights} argument supports sampling
79  weights from complex sampling designs
80  via \pkg{svyVGAM}.
81  Some details can be found at
82  \url{https://CRAN.R-project.org/package=svyVGAM}.
83
84
85
86% Creates CRAN problems:
87% \url{https://cran.r-project.org/web/packages/svyVGAM/vignettes/theory.html}.
88
89
90
91
92
93% 20140507:
94% Currently the \code{weights} argument does not support sampling
95% weights from complex sampling designs.
96% And currently sandwich estimators are not computed in any
97% shape or form.
98% The present weights are multiplied by the corresponding
99% log-likelihood contributions and added to form the
100% overall log-likelihood.
101
102
103
104
105
106
107% If \code{weights} is a matrix,
108% then it should be must be in \emph{matrix-band} form, whereby the
109% first \eqn{M} columns of the matrix are the diagonals,
110% followed by the upper-diagonal band, followed by the
111% band above that, etc. In this case, there can be up to
112% \eqn{M(M+1)} columns, with the last column corresponding
113% to the (1,\eqn{M}) elements of the weight matrices.
114
115
116  }
117  \item{subset}{
118  an optional logical vector specifying a subset of
119  observations to
120  be used in the fitting process.
121
122
123
124  }
125  \item{na.action}{
126  a function which indicates what should happen when
127  the data contain \code{NA}s.
128  The default is set by the \code{na.action} setting
129  of \code{\link[base]{options}}, and is \code{na.fail} if that
130  is unset.
131  The ``factory-fresh'' default is \code{na.omit}.
132
133
134  }
135  \item{etastart}{
136  optional starting values for the linear predictors.
137  It is a \eqn{M}-column matrix with the same number of rows as
138  the response.
139  If \eqn{M = 1} then it may be a vector.
140  Note that \code{etastart} and the output of \code{predict(fit)}
141  should be comparable.
142  Here, \code{fit} is the fitted object.
143  Almost all \pkg{VGAM} family functions are self-starting.
144
145
146
147  }
148  \item{mustart}{
149  optional starting values for the fitted values.
150  It can be a vector or a matrix;
151  if a matrix, then it has the same number of rows as the response.
152  Usually \code{mustart} and the output of \code{fitted(fit)}
153  should be comparable.
154  Most family functions do not make use of this argument
155  because it is not possible to compute all \eqn{M} columns of
156  \code{eta} from \code{mu}.
157
158
159
160  }
161  \item{coefstart}{
162  optional starting values for the coefficient vector.
163  The length and order must match that of \code{coef(fit)}.
164
165
166  }
167  \item{control}{
168  a list of parameters for controlling the fitting process.
169  See \code{\link{vglm.control}} for details.
170
171
172  }
173  \item{offset}{
174   a vector or \eqn{M}-column matrix of offset values.
175   These are \emph{a priori} known and are added to the
176   linear/additive predictors during fitting.
177
178
179  }
180  \item{method}{
181  the method to be used in fitting the model.  The default (and
182  presently only) method \code{vglm.fit()} uses iteratively
183  reweighted least squares (IRLS).
184
185
186  }
187  \item{model}{
188  a logical value indicating whether the
189  \emph{model frame}
190  should be assigned in the \code{model} slot.
191
192
193  }
194  \item{x.arg, y.arg}{
195  logical values indicating whether
196  the LM matrix and response vector/matrix used in the fitting
197  process should be assigned in the \code{x} and \code{y} slots.
198  Note that the model matrix is the LM matrix; to get the VGLM
199  matrix type \code{model.matrix(vglmfit)} where
200  \code{vglmfit} is a \code{vglm} object.
201
202
203  }
204  \item{contrasts}{
205  an optional list. See the \code{contrasts.arg}
206  of \code{\link{model.matrix.default}}.
207
208
209  }
210  \item{constraints}{
211  an optional \code{\link[base]{list}} of constraint matrices.
212  The components of the list must be named (labelled)
213  with the term it corresponds to
214  (and it must match in character format \emph{exactly}---see below).
215  There are two types of input: \code{"lm"}-type and \code{"vlm"}-type.
216  The former is a subset of the latter.
217  The former has a matrix for each term of the LM matrix.
218  The latter has a matrix for each column of the big VLM matrix.
219  After fitting, the \code{\link{constraints}}
220  extractor function may be applied; it returns
221  the \code{"vlm"}-type list of constraint matrices
222  by default.  If \code{"lm"}-type are returned by
223  \code{\link{constraints}} then these can be fed into this
224  argument and it should give the same model as before.
225
226
227
228  If the \code{constraints} argument is used then the
229  family function's \code{zero} argument (if it exists)
230  needs to be set to
231  \code{NULL}. This avoids what could be a probable contradiction.
232  Sometimes setting other arguments related to constraint
233  matrices to \code{FALSE} is also a good idea, e.g.,
234  \code{parallel = FALSE},
235  \code{exchangeable = FALSE}.
236
237
238
239  Properties:
240  each constraint matrix must have \eqn{M} rows, and be of
241  full-column rank.  By default, constraint matrices are
242  the \eqn{M} by \eqn{M} identity matrix unless arguments
243  in the family function itself override these values, e.g.,
244  \code{parallel} (see  \code{\link{CommonVGAMffArguments}}).
245  If \code{constraints} is used then it must contain \emph{all}
246  the terms; an incomplete list is not accepted.
247
248
249
250  As mentioned above, the labelling of each constraint matrix
251  must match exactly, e.g.,
252  \code{list("s(x2,df=3)"=diag(2))}
253  will fail as
254  \code{as.character(~ s(x2,df=3))} produces white spaces:
255  \code{"s(x2, df = 3)"}.
256  Thus
257  \code{list("s(x2, df = 3)" = diag(2))}
258  is needed.
259  See Example 6 below.
260  More details are given in Yee (2015; Section 3.3.1.3)
261  which is on p.101.
262  Note that the label for the intercept is \code{"(Intercept)"}.
263
264
265
266  }
267  \item{extra}{
268  an optional list with any extra information that might be needed by
269  the \pkg{VGAM} family function.
270
271
272  }
273  \item{form2}{
274  the second (optional) formula.
275  If argument \code{xij} is used (see \code{\link{vglm.control}}) then
276  \code{form2} needs to have \emph{all} terms in the model.
277  Also, some \pkg{VGAM} family functions such as \code{\link{micmen}}
278  use this argument to input the regressor variable.
279  If given, the slots \code{@Xm2} and \code{@Ym2} may be assigned.
280  Note that smart prediction applies to terms in \code{form2} too.
281
282
283  }
284  \item{qr.arg}{
285  logical value indicating whether the slot \code{qr}, which
286  returns the QR decomposition of the VLM model matrix,
287  is returned on the object.
288
289
290  }
291  \item{smart}{
292  logical value indicating whether smart prediction
293  (\code{\link{smartpred}}) will be used.
294
295
296  }
297  \item{\dots}{
298  further arguments passed into \code{\link{vglm.control}}.
299
300
301  }
302
303}
304\details{
305  A vector generalized linear model (VGLM) is loosely defined
306  as a statistical model that is a function of \eqn{M} linear
307  predictors and can be estimated by Fisher scoring.
308  The central formula is given by
309  \deqn{\eta_j = \beta_j^T x}{%
310         eta_j = beta_j^T x}
311  where \eqn{x}{x} is a vector of explanatory variables
312  (sometimes just a 1 for an intercept),
313  and
314  \eqn{\beta_j}{beta_j} is a vector of regression coefficients
315  to be estimated.
316  Here, \eqn{j=1,\ldots,M}, where \eqn{M} is finite.
317  Then one can write
318  \eqn{\eta=(\eta_1,\ldots,\eta_M)^T}{eta=(eta_1,\ldots,\eta_M)^T}
319  as a vector of linear predictors.
320
321
322  Most users will find \code{vglm} similar in flavour to
323  \code{\link[stats]{glm}}.
324  The function \code{vglm.fit} actually does the work.
325
326
327% If more than one of \code{etastart}, \code{start} and \code{mustart}
328% is specified, the first in the list will be used.
329
330
331
332}
333\value{
334  An object of class \code{"vglm"}, which has the
335  following slots. Some of these may not be assigned to save
336  space, and will be recreated if necessary later.
337  \item{extra}{the list \code{extra} at the end of fitting.}
338  \item{family}{the family function (of class \code{"vglmff"}).}
339  \item{iter}{the number of IRLS iterations used.}
340  \item{predictors}{a \eqn{M}-column matrix of linear predictors.}
341  \item{assign}{a named list which matches the columns and the
342       (LM) model matrix terms.}
343  \item{call}{the matched call.}
344  \item{coefficients}{a named vector of coefficients.}
345  \item{constraints}{
346    a named list of constraint matrices used in the fitting.
347  }
348  \item{contrasts}{the contrasts used (if any).}
349  \item{control}{list of control parameter used in the fitting.}
350  \item{criterion}{list of convergence criterion evaluated at the
351                   final IRLS iteration.}
352  \item{df.residual}{the residual degrees of freedom.}
353  \item{df.total}{the total degrees of freedom.}
354  \item{dispersion}{the scaling parameter.}
355  \item{effects}{the effects.}
356  \item{fitted.values}{
357  the fitted values, as a matrix.
358  This is often the mean but may be quantiles, or the location
359  parameter, e.g., in the Cauchy model.
360
361  }
362  \item{misc}{a list to hold miscellaneous parameters.}
363  \item{model}{the model frame.}
364  \item{na.action}{a list holding information about missing values.}
365  \item{offset}{if non-zero, a \eqn{M}-column matrix of offsets.}
366  \item{post}{a list where post-analysis results may be put.}
367  \item{preplot}{used by \code{\link{plotvgam}}, the plotting parameters
368        may be put here.}
369  \item{prior.weights}{
370  initially supplied weights
371  (the \code{weights} argument).
372  Also see \code{\link{weightsvglm}}.
373
374  }
375  \item{qr}{the QR decomposition used in the fitting.}
376  \item{R}{the \bold{R} matrix in the QR decomposition
377    used in the fitting.}
378  \item{rank}{numerical rank of the fitted model.}
379  \item{residuals}{the \emph{working} residuals at the
380    final IRLS iteration.}
381  \item{ResSS}{residual sum of squares at the final IRLS iteration with
382  the adjusted dependent vectors and weight matrices.}
383  \item{smart.prediction}{
384  a list of data-dependent parameters (if any)
385  that are used by smart prediction.
386
387  }
388  \item{terms}{the \code{\link[stats]{terms}} object used.}
389  \item{weights}{the working weight matrices at the final IRLS iteration.
390    This is in matrix-band form.}
391  \item{x}{the model matrix (linear model LM, not VGLM).}
392  \item{xlevels}{the levels of the factors, if any, used in fitting.}
393  \item{y}{the response, in matrix form.}
394
395
396  This slot information is repeated at \code{\link{vglm-class}}.
397
398
399}
400\references{
401
402
403Yee, T. W. (2015).
404Vector Generalized Linear and Additive Models:
405With an Implementation in R.
406New York, USA: \emph{Springer}.
407
408
409
410Yee, T. W. and Hastie, T. J. (2003).
411Reduced-rank vector generalized linear models.
412\emph{Statistical Modelling},
413\bold{3}, 15--41.
414
415
416Yee, T. W. and Wild, C. J. (1996).
417Vector generalized additive models.
418\emph{Journal of the Royal Statistical Society, Series B, Methodological},
419\bold{58}, 481--493.
420
421
422
423Yee, T. W. (2014).
424Reduced-rank vector generalized linear models with two linear predictors.
425\emph{Computational Statistics and Data Analysis},
426\bold{71}, 889--902.
427
428
429
430Yee, T. W. (2008).
431The \code{VGAM} Package.
432\emph{R News}, \bold{8}, 28--39.
433
434
435%  Documentation accompanying the \pkg{VGAM} package at
436%  \url{http://www.stat.auckland.ac.nz/~yee}
437%  contains further information and examples.
438
439
440}
441
442
443\author{ Thomas W. Yee }
444\note{
445  This function can fit a wide variety of statistical models. Some of
446  these are harder to fit than others because of inherent numerical
447  difficulties associated with some of them. Successful model fitting
448  benefits from cumulative experience. Varying the values of arguments
449  in the \pkg{VGAM} family function itself is a good first step if
450  difficulties arise, especially if initial values can be inputted.
451  A second, more general step, is to vary the values of arguments in
452  \code{\link{vglm.control}}.
453  A third step is to make use of arguments such as \code{etastart},
454  \code{coefstart} and \code{mustart}.
455
456
457
458  Some \pkg{VGAM} family functions end in \code{"ff"} to avoid
459  interference with other functions, e.g.,
460  \code{\link{binomialff}},
461  \code{\link{poissonff}}.
462  This is because \pkg{VGAM} family
463  functions are incompatible with \code{\link[stats]{glm}}
464  (and also \code{gam()} in \pkg{gam} and
465  \code{\link[mgcv]{gam}} in the \pkg{mgcv} library).
466
467
468
469% \code{gammaff}.
470% \code{\link{gaussianff}},
471
472
473
474  The smart prediction (\code{\link{smartpred}}) library is incorporated
475  within the \pkg{VGAM} library.
476
477
478
479  The theory behind the scaling parameter is currently being made more
480  rigorous, but it it should give the same value as the scale parameter
481  for GLMs.
482
483
484
485  In Example 5 below, the \code{xij} argument to illustrate covariates
486  that are specific to a linear predictor. Here, \code{lop}/\code{rop}
487  are
488  the ocular pressures of the left/right eye (artificial data).
489  Variables \code{leye} and \code{reye} might be the presence/absence of
490  a particular disease on the LHS/RHS eye respectively.
491  See
492  \code{\link{vglm.control}}
493  and
494  \code{\link{fill}}
495  for more details and examples.
496
497
498}
499
500%~Make other sections like WARNING with \section{WARNING }{....} ~
501\section{WARNING}{
502  See warnings in \code{\link{vglm.control}}.
503  Also, see warnings under \code{weights} above regarding
504  sampling weights from complex sampling designs.
505
506
507}
508
509
510
511\seealso{
512  \code{\link{vglm.control}},
513  \code{\link{vglm-class}},
514  \code{\link{vglmff-class}},
515  \code{\link{smartpred}},
516  \code{vglm.fit},
517  \code{\link{fill}},
518  \code{\link{rrvglm}},
519  \code{\link{vgam}}.
520  Methods functions include
521  \code{\link{add1.vglm}},
522  \code{\link{anova.vglm}},
523  \code{\link{AICvlm}},
524  \code{\link{coefvlm}},
525  \code{\link{confintvglm}},
526  \code{\link{constraints.vlm}},
527  \code{\link{drop1.vglm}},
528  \code{\link{fittedvlm}},
529  \code{\link{hatvaluesvlm}},
530  \code{\link{hdeff.vglm}},
531  \code{\link{linkfun.vglm}},
532  \code{\link{lrt.stat.vlm}},
533  \code{\link{score.stat.vlm}},
534  \code{\link{wald.stat.vlm}},
535  \code{\link{nobs.vlm}},
536  \code{\link{npred.vlm}},
537  \code{\link{plotvglm}},
538  \code{\link{predictvglm}},
539  \code{\link{residualsvglm}},
540  \code{\link{step4vglm}},
541  \code{\link{summaryvglm}},
542  \code{\link{lrtest_vglm}},
543  \code{\link[stats]{update}},
544  etc.
545
546
547}
548
549\examples{
550# Example 1. See help(glm)
551print(d.AD <- data.frame(treatment = gl(3, 3),
552                         outcome = gl(3, 1, 9),
553                         counts = c(18,17,15,20,10,20,25,13,12)))
554vglm.D93 <- vglm(counts ~ outcome + treatment, family = poissonff,
555                 data = d.AD, trace = TRUE)
556summary(vglm.D93)
557
558
559# Example 2. Multinomial logit model
560pneumo <- transform(pneumo, let = log(exposure.time))
561vglm(cbind(normal, mild, severe) ~ let, multinomial, data = pneumo)
562
563
564# Example 3. Proportional odds model
565fit3 <- vglm(cbind(normal, mild, severe) ~ let, propodds, data = pneumo)
566coef(fit3, matrix = TRUE)
567constraints(fit3)
568model.matrix(fit3, type = "lm")  # LM model matrix
569model.matrix(fit3)               # Larger VGLM (or VLM) model matrix
570
571
572# Example 4. Bivariate logistic model
573fit4 <- vglm(cbind(nBnW, nBW, BnW, BW) ~ age, binom2.or, coalminers)
574coef(fit4, matrix = TRUE)
575depvar(fit4)  # Response are proportions
576weights(fit4, type = "prior")
577
578
579# Example 5. The use of the xij argument (simple case).
580# The constraint matrix for 'op' has one column.
581nn <- 1000
582eyesdat <- round(data.frame(lop = runif(nn),
583                            rop = runif(nn),
584                             op = runif(nn)), digits = 2)
585eyesdat <- transform(eyesdat, eta1 = -1 + 2 * lop,
586                              eta2 = -1 + 2 * lop)
587eyesdat <- transform(eyesdat,
588           leye = rbinom(nn, size = 1, prob = logitlink(eta1, inverse = TRUE)),
589           reye = rbinom(nn, size = 1, prob = logitlink(eta2, inverse = TRUE)))
590head(eyesdat)
591fit5 <- vglm(cbind(leye, reye) ~ op,
592             binom2.or(exchangeable = TRUE, zero = 3),
593             data = eyesdat, trace = TRUE,
594             xij = list(op ~ lop + rop + fill(lop)),
595             form2 = ~  op + lop + rop + fill(lop))
596coef(fit5)
597coef(fit5, matrix = TRUE)
598constraints(fit5)
599fit5@control$xij
600head(model.matrix(fit5))
601
602
603# Example 6. The use of the 'constraints' argument.
604as.character(~ bs(year,df=3))  # Get the white spaces right
605clist <- list("(Intercept)"      = diag(3),
606              "bs(year, df = 3)" = rbind(1, 0, 0))
607fit1 <- vglm(r1 ~ bs(year,df=3), gev(zero = NULL),
608             data = venice, constraints = clist, trace = TRUE)
609coef(fit1, matrix = TRUE)  # Check
610}
611\keyword{models}
612\keyword{regression}
613
614%eyesdat$leye <- ifelse(runif(n) < 1/(1+exp(-1+2*eyesdat$lop)), 1, 0)
615%eyesdat$reye <- ifelse(runif(n) < 1/(1+exp(-1+2*eyesdat$rop)), 1, 0)
616%coef(fit, matrix = TRUE, compress = FALSE)
617
618
619
620% 20090506 zz Put these examples elsewhere:
621%
622%# Example 6. The use of the xij argument (complex case).
623%# Here is one method to handle the xij argument with a term that
624%# produces more than one column in the model matrix.
625%# The constraint matrix for 'op' has essentially one column.
626%POLY3 <- function(x, ...) {
627%    # A cubic; ensures that the basis functions are the same.
628%    poly(c(x,...), 3)[1:length(x),]
629%    head(poly(c(x,...), 3), length(x), drop = FALSE)
630%}
631%
632%fit6 <- vglm(cbind(leye, reye) ~ POLY3(op), trace = TRUE,
633%            fam = binom2.or(exchangeable = TRUE, zero=3),  data=eyesdat,
634%            xij = list(POLY3(op) ~ POLY3(lop,rop) + POLY3(rop,lop) +
635%                                   fill(POLY3(lop,rop))),
636%            form2 = ~  POLY3(op) + POLY3(lop,rop) + POLY3(rop,lop) +
637%                       fill(POLY3(lop,rop)))
638%coef(fit6)
639%coef(fit6, matrix = TRUE)
640%head(predict(fit6))
641%\dontrun{
642%plotvgam(fit6, se = TRUE)  # Wrong since it plots against op, not lop.
643%}
644%
645%
646%# Example 7. The use of the xij argument (simple case).
647%# Each constraint matrix has 4 columns.
648%ymat <- rdiric(n <- 1000, shape=c(4,7,3,1))
649%mydat <- data.frame(x1=runif(n), x2=runif(n), x3=runif(n), x4=runif(n),
650%                   z1=runif(n), z2=runif(n), z3=runif(n), z4=runif(n),
651%                   X2=runif(n), Z2=runif(n))
652%mydat <- round(mydat, dig=2)
653%fit7 <- vglm(ymat ~ X2 + Z2, data=mydat, crit="c",
654%           fam = dirichlet(parallel = TRUE),  # Intercept is also parallel.
655%           xij = list(Z2 ~ z1 + z2 + z3 + z4,
656%                      X2 ~ x1 + x2 + x3 + x4),
657%           form2 =  ~ Z2 + z1 + z2 + z3 + z4 +
658%                      X2 + x1 + x2 + x3 + x4)
659%head(model.matrix(fit7, type="lm"))   # LM model matrix
660%head(model.matrix(fit7, type="vlm"))  # Big VLM model matrix
661%coef(fit7)
662%coef(fit7, matrix = TRUE)
663%max(abs(predict(fit7)-predict(fit7, new=mydat)))  # Predicts correctly
664%summary(fit7)
665
666
667
668
669