1\name{negbinomial}
2\alias{negbinomial}
3\alias{polya}
4\alias{polyaR}
5%- Also NEED an '\alias' for EACH other topic documented here.
6\title{ Negative Binomial Distribution Family Function }
7\description{
8  Maximum likelihood estimation of the two parameters of a negative
9  binomial distribution.
10
11}
12\usage{
13negbinomial(zero = "size", parallel = FALSE, deviance.arg = FALSE,
14            type.fitted = c("mean", "quantiles"),
15            percentiles = c(25, 50, 75),
16            mds.min = 1e-3, nsimEIM = 500, cutoff.prob = 0.999,
17            eps.trig = 1e-7, max.support = 4000, max.chunk.MB = 30,
18            lmu = "loglink", lsize = "loglink",
19            imethod = 1, imu = NULL, iprobs.y = NULL,
20            gprobs.y = ppoints(6), isize = NULL,
21            gsize.mux = exp(c(-30, -20, -15, -10, -6:3)))
22polya(zero = "size", type.fitted = c("mean", "prob"),
23      mds.min = 1e-3, nsimEIM = 500, cutoff.prob = 0.999,
24      eps.trig = 1e-7, max.support = 4000, max.chunk.MB = 30,
25      lprob = "logitlink", lsize = "loglink", imethod = 1, iprob = NULL,
26      iprobs.y = NULL, gprobs.y = ppoints(6), isize = NULL,
27      gsize.mux = exp(c(-30, -20, -15, -10, -6:3)), imunb = NULL)
28polyaR(zero = "size", type.fitted = c("mean", "prob"),
29       mds.min = 1e-3, nsimEIM = 500,  cutoff.prob = 0.999,
30       eps.trig = 1e-7, max.support = 4000, max.chunk.MB = 30,
31       lsize = "loglink", lprob = "logitlink", imethod = 1, iprob = NULL,
32       iprobs.y = NULL, gprobs.y = ppoints(6), isize = NULL,
33       gsize.mux = exp(c(-30, -20, -15, -10, -6:3)), imunb = NULL)
34}
35
36%     deviance.arg = FALSE,
37
38%- maybe also 'usage' for other objects documented here.
39\arguments{
40  \item{zero}{
41    Can be an integer-valued vector, and if so, then
42    it is usually assigned \eqn{-2}
43  or \eqn{2}. Specifies which of the two
44  linear/additive predictors are modelled as an intercept
45  only. By default, the \eqn{k} parameter (after \code{lsize}
46  is applied) is modelled as a single unknown number that
47  is estimated. It can be modelled as a function of the
48  explanatory variables by setting \code{zero = NULL}; this
49  has been called a NB-H model by Hilbe (2011). A negative
50  value means that the value is recycled, so setting \eqn{-2}
51  means all \eqn{k} are intercept-only.
52  See \code{\link{CommonVGAMffArguments}} for more information.
53
54
55
56% 20190119; getarg() fixes this problem:
57% Because of the new labelling for \code{\link{nbcanlink}} the default
58% is now \code{-2} rather than \code{"size"}; the latter is more
59% understandable really.
60
61
62  }
63  \item{lmu, lsize, lprob}{
64  Link functions applied to the \eqn{\mu}{mu}, \eqn{k}
65  and \eqn{p}  parameters.
66  See \code{\link{Links}} for more choices.
67  Note that the \eqn{\mu}{mu}, \eqn{k}
68  and \eqn{p}  parameters are the \code{mu},
69  \code{size} and \code{prob} arguments of
70  \code{\link[stats:NegBinomial]{rnbinom}} respectively.
71  Common alternatives for \code{lsize} are
72  \code{\link{negloglink}} and
73  \code{\link{reciprocallink}}, and
74  \code{\link{logloglink}} (if \eqn{k > 1}).
75
76
77  }
78  \item{imu, imunb, isize, iprob}{
79  Optional initial values for the mean and \eqn{k} and \eqn{p}.
80  For \eqn{k}, if failure to converge occurs then try different values
81  (and/or use \code{imethod}).
82  For a \eqn{S}-column response, \code{isize} can be of length \eqn{S}.
83  A value \code{NULL} means an initial value for each response is
84  computed internally using a gridsearch based on \code{gsize.mux}.
85  The last argument is ignored if used within \code{\link{cqo}}; see
86  the \code{iKvector} argument of \code{\link{qrrvglm.control}} instead.
87  In the future \code{isize} and \code{iprob} might be depreciated.
88
89
90  }
91
92  \item{nsimEIM}{
93  This argument is used
94  for computing the diagonal element of the
95  \emph{expected information matrix} (EIM) corresponding to \eqn{k}
96  based on the \emph{simulated Fisher scoring} (SFS) algorithm.
97  See \code{\link{CommonVGAMffArguments}} for more information
98  and the notes below.
99  SFS is one of two algorithms for computing the EIM elements
100  (so that both algorithms may be used on a given data set).
101  SFS is faster than the exact method when \code{Qmax} is large.
102
103
104
105  }
106  \item{cutoff.prob}{
107  Fed into the \code{p} argument
108  of \code{\link[stats:NegBinomial]{qnbinom}}
109  in order to obtain an upper limit for the approximate
110  support of the distribution, called \code{Qmax}, say.
111  Similarly, the value \code{1-p} is
112  fed into the \code{p} argument
113  of \code{\link[stats:NegBinomial]{qnbinom}}
114  in order to obtain a lower limit for the approximate
115  support of the distribution, called \code{Qmin}, say.
116  Hence the approximate support is \code{Qmin:Qmax}.
117  This argument should be
118  a numeric and close to 1 but never exactly 1.
119  Used to specify how many terms of the infinite series
120  for computing the second diagonal element of the
121  EIM are actually used.
122  The closer this argument is to 1, the more accurate the
123  standard errors of the regression coefficients will be.
124  If this argument is too small, convergence will take longer.
125
126
127
128% The sum of the probabilites are added until they reach
129% at least this value.
130% (but no more than \code{Maxiter} terms allowed).
131% Used in the finite series approximation.
132% It is like specifying \code{p} in an imaginary function \code{qnegbin(p)}.
133
134
135  }
136  \item{max.chunk.MB, max.support}{
137    \code{max.support} is used to describe the eligibility of
138    individual observations
139    to have their EIM computed by the \emph{exact method}.
140    Here, we are concerned about
141  computing the EIM wrt \eqn{k}.
142  The exact method algorithm operates separately on each response
143  variable,
144  and it constructs a large matrix provided that the number of columns
145  is less than \code{max.support}.
146  If so, then the computations are done in chunks, so
147  that no more than about \code{max.chunk.MB} megabytes
148  of memory is used at a time (actually, it is proportional to this amount).
149  Regarding eligibility of this algorithm, each observation must
150  have the length of the vector, starting from
151  the \code{1-cutoff.prob} quantile
152  and finishing up at the \code{cutoff.prob} quantile,
153  less than \code{max.support}
154  (as its approximate support).
155  If you have abundant memory then you might try setting
156  \code{max.chunk.MB = Inf}, but then the computations might take
157  a very long time.
158  Setting \code{max.chunk.MB = 0} or \code{max.support = 0}
159  will force the EIM to be computed using the SFS algorithm only
160  (this \emph{used to be} the default method for \emph{all} the observations).
161  When the fitted values of the model are large and \eqn{k} is small,
162  the computation of the EIM will be costly with respect to time
163  and memory if the exact method is used. Hence the argument
164  \code{max.support} limits the cost in terms of time.
165  For intercept-only models \code{max.support} is multiplied by
166  a number (such as 10) because only one inner product needs be computed.
167  Note: \code{max.support} is an upper bound and limits the number of
168  terms dictated by the \code{eps.trig} argument.
169
170
171% Thus the number of columns of the matrix can be controlled by
172% the argument \code{cutoff.prob}.
173
174  }
175
176
177\item{mds.min}{
178Numeric.
179Minimum value of the NBD mean divided by \code{size} parameter.
180The closer this ratio is to 0, the closer the distribution is
181to a Poisson.
182Iterations will stop when an estimate of \eqn{k} is so large,
183relative to the mean, than it is below this threshold
184(this is treated as a boundary of the parameter space).
185
186
187
188  }
189
190
191\item{eps.trig}{
192Numeric.
193A small positive value used in the computation of the EIMs.
194It focusses on the denominator of the terms of a series.
195Each term in the series (that is used to approximate an infinite series)
196has a value greater than \code{size / sqrt(eps.trig)},
197thus very small terms are ignored.
198It's a good idea to set a smaller value that will result in more accuracy,
199but it will require a greater computing time (when \eqn{k} is close to 0).
200And adjustment to \code{max.support} may be needed.
201In particular, the quantity computed by special means
202is \eqn{\psi'(k) - E[\psi'(Y+k)]}{trigamma(k) - E[trigamma(Y+k)]},
203which is the difference between two
204\code{\link[base]{trigamma}}.
205functions. It is part of the calculation of the EIM with
206respect to the \code{size} parameter.
207
208
209
210}
211\item{gsize.mux}{
212  Similar to \code{gsigma} in \code{\link{CommonVGAMffArguments}}.
213  However, this grid is multiplied by the initial
214  estimates of the NBD mean parameter.
215  That is, it is on a relative scale rather than on an
216  absolute scale.
217  If the counts are very large in value then convergence fail might
218  occur; if so, then try a smaller value such as
219  \code{gsize.mux = exp(-40)}.
220
221
222}
223
224% \item{Maxiter}{
225% Used in the finite series approximation.
226% Integer. The maximum number of terms allowed when computing
227% the second diagonal element of the EIM.
228% In theory, the value involves an infinite series.
229% If this argument is too small then the value may be inaccurate.
230
231
232% }
233
234
235
236
237
238  \item{type.fitted, percentiles}{
239  See \code{\link{CommonVGAMffArguments}} for more information.
240
241
242  }
243
244
245
246  \item{deviance.arg}{
247  Logical.
248  If \code{TRUE}, the deviance is computed \emph{after} convergence.
249  It only works in the NB-2 model.
250  It is also necessary to set \code{criterion = "coefficients"}
251  or \code{half.step = FALSE}
252  since
253  one cannot use that criterion properly for the minimization
254  within the IRLS algorithm.
255  It should be set \code{TRUE} when
256  used with \code{\link{cqo}} under the fast algorithm.
257
258
259
260% Pre-20131212:
261% If \code{TRUE}, the deviance function is attached
262% to the object. Under ordinary circumstances, it should be
263% left alone because it really assumes the index parameter
264% is at the maximum likelihood estimate. Consequently,
265% one cannot use that criterion to minimize within the
266% IRLS algorithm. It should be set \code{TRUE} only when
267% used with \code{\link{cqo}} under the fast algorithm.
268
269
270  }
271  \item{imethod}{
272  An integer with value \code{1} or \code{2} etc. which
273  specifies the initialization method for the \eqn{\mu}{mu} parameter.
274  If failure to converge occurs try another value
275  and/or else specify a value for \code{iprobs.y}
276  and/or else specify a value for \code{isize}.
277
278
279  }
280  \item{parallel}{
281  See \code{\link{CommonVGAMffArguments}} for more information.
282  Setting \code{parallel = TRUE} is useful in order to get
283  something similar to \code{\link[stats]{quasipoisson}} or
284  what is known as NB-1.
285  If \code{parallel = TRUE} then the parallelism constraint
286  does not apply to any intercept term.
287  You should set \code{zero = NULL} too if \code{parallel = TRUE} to
288  avoid a conflict.
289
290
291
292  }
293  \item{gprobs.y}{
294    A vector representing a grid;
295    passed into the \code{probs} argument
296  of \code{\link[stats:quantile]{quantile}}
297  when \code{imethod = 1} to obtain an initial value for the mean
298  of each response. Is overwritten by any value of \code{iprobs.y}.
299
300
301  }
302  \item{iprobs.y}{
303  Passed into the \code{probs} argument
304  of \code{\link[stats:quantile]{quantile}}
305  when \code{imethod = 1} to obtain an initial value for the mean
306  of each response. Overwrites any value of \code{gprobs.y}.
307  This argument might be deleted in the future.
308
309
310  }
311%  \item{ishrinkage}{
312%  How much shrinkage is used when initializing \eqn{\mu}{mu}.
313%  The value must be between 0 and 1 inclusive, and
314%  a value of 0 means the individual response values are used,
315%  and a value of 1 means the median or mean is used.
316%  This argument is used in conjunction with \code{imethod}.
317%  If convergence failure occurs try setting this argument to 1.
318%  }
319
320
321}
322\details{
323  The negative binomial distribution (NBD)
324  can be motivated in several ways,
325  e.g., as a Poisson distribution with a mean that is gamma
326  distributed.
327  There are several common parametrizations of the NBD.
328  The one used by \code{negbinomial()} uses the
329  mean \eqn{\mu}{mu} and an \emph{index} parameter
330  \eqn{k}, both which are positive.
331  Specifically, the density of a random variable \eqn{Y} is
332  \deqn{f(y;\mu,k) = {y + k - 1 \choose y} \,
333    \left( \frac{\mu}{\mu+k} \right)^y\,
334    \left( \frac{k}{k+\mu} \right)^k }{%
335    f(y;mu,k) = C_{y}^{y + k - 1}
336    [mu/(mu+k)]^y [k/(k+mu)]^k}
337  where \eqn{y=0,1,2,\ldots},
338  and \eqn{\mu > 0}{mu > 0} and \eqn{k > 0}.
339  Note that the \emph{dispersion} parameter is
340  \eqn{1/k}, so that as \eqn{k} approaches infinity the
341  NBD approaches a Poisson distribution.
342  The response has variance \eqn{Var(Y)=\mu+\mu^2/k}{Var(Y)=mu*(1+mu/k)}.
343  When fitted, the \code{fitted.values} slot of the object contains
344  the estimated value of the \eqn{\mu}{mu} parameter, i.e., of the mean
345  \eqn{E(Y)}.
346  It is common for some to use \eqn{\alpha=1/k}{alpha=1/k} as the
347  ancillary or heterogeneity parameter;
348  so common alternatives for \code{lsize} are
349  \code{\link{negloglink}} and
350  \code{\link{reciprocallink}}.
351
352
353  For \code{polya} the density is
354  \deqn{f(y;p,k) = {y + k - 1 \choose y} \,
355    \left( 1 - p \right)^y\,
356    p^k }{%
357    f(y;p,k) = C_{y}^{y + k - 1}
358    [1 - p]^y p^k}
359  where \eqn{y=0,1,2,\ldots},
360  and \eqn{k > 0} and \eqn{0 < p < 1}{0 < p < 1}.
361
362
363  Family function \code{polyaR()} is the same as \code{polya()} except
364  the order of the two parameters are switched.
365  The reason is that \code{polyaR()} tries to match with
366  \code{\link[stats:NegBinomial]{rnbinom}} closely
367  in terms of the argument order, etc.
368  Should the probability parameter be of primary interest,
369  probably, users will prefer using  \code{polya()} rather than
370  \code{polyaR()}.
371  Possibly \code{polyaR()} will be decommissioned one day.
372
373
374
375  The NBD can be coerced into the
376  classical GLM framework with one of the parameters being
377  of interest and the other treated as a nuisance/scale
378  parameter (this is implemented in the \pkg{MASS} library). The
379  \pkg{VGAM} family function \code{negbinomial()} treats both
380  parameters on the same footing, and estimates them both
381  by full maximum likelihood estimation.
382
383
384%  SFS is employed as the default (see the \code{nsimEIM}
385%  argument).
386
387
388  The parameters \eqn{\mu}{mu} and \eqn{k} are independent
389  (diagonal EIM), and the confidence region for \eqn{k}
390  is extremely skewed so that its standard error is often
391  of no practical use. The parameter \eqn{1/k} has been
392  used as a measure of aggregation.
393  For the NB-C the EIM is not diagonal.
394
395
396  These \pkg{VGAM} family functions handle
397  \emph{multiple} responses, so that a response matrix can be
398  inputted. The number of columns is the number
399  of species, say, and setting \code{zero = -2} means that
400  \emph{all} species have a \eqn{k} equalling a (different)
401  intercept only.
402
403
404
405Conlisk, et al. (2007) show that fitting the NBD to
406presence-absence data will result in identifiability problems.
407However, the model is identifiable if the response values include
4080, 1 and 2.
409
410
411
412
413% Solow and Smith (2010)
414
415
416
417
418
419}
420\section{Warning}{
421  Poisson regression corresponds to \eqn{k} equalling
422  infinity.  If the data is Poisson or close to Poisson,
423  numerical problems may occur.
424  Some corrective measures are taken, e.g.,
425  \eqn{k} is effectively capped (relative to the mean) during estimation
426  to some large value and a warning is issued.
427  And setting \code{stepsize = 0.5} for half stepping is
428  probably a good idea too when the data is extreme.
429
430
431% Possibly setting \code{crit = "coef"} is a good idea because
432% the log-likelihood is often a \code{NaN} when the \code{size}
433% value is very large.
434
435
436% Note that \code{dnbinom(0, mu, size = Inf)} currently
437% is a \code{NaN} (a bug),
438% therefore if the data has some 0s then
439% setting \code{crit = "coef"} will avoid the problem that
440% the log-likelihood will be undefined during the last
441% stages of estimation.
442
443
444% Possibly choosing a log-log link may help in such cases,
445% otherwise try \code{\link{poissonff}} or
446% \code{\link{quasipoissonff}}. It is possible to fit a NBD
447% that has a similar variance function as a quasi-Poisson; see
448% the NB-1 example below.
449
450
451
452The NBD is a strictly unimodal
453distribution. Any data set that does not exhibit a mode (somewhere
454in the middle) makes the estimation problem difficult.
455Set \code{trace = TRUE} to monitor convergence.
456
457
458
459
460
461  These functions are fragile; the maximum likelihood
462  estimate of the index parameter is fraught (see Lawless, 1987).
463  Other alternatives to \code{negbinomial} are
464  to fit a NB-1 or RR-NB (aka NB-P) model; see Yee (2014).
465  Also available are the NB-C, NB-H and NB-G.
466  Assigning values to the \code{isize} argument may lead
467  to a local solution, and smaller values are preferred
468  over large values when using this argument.
469
470
471
472% In general, the \code{\link{quasipoissonff}} is more robust.
473
474
475
476
477  If one wants to force SFS
478  to be used on all observations, then
479  set \code{max.support = 0} or \code{max.chunk.MB = 0}.
480  If one wants to force the exact method
481  to be used for all observations, then
482  set \code{max.support = Inf}.
483  If the computer has \emph{much} memory, then trying
484  \code{max.chunk.MB = Inf} and
485  \code{max.support = Inf}
486  may provide a small speed increase.
487  If SFS is used at all, then the \code{@weights} slot of the
488  fitted object will be a matrix;
489  otherwise that slot will be a \code{0 x 0} matrix.
490
491
492
493  An alternative to the NBD is the generalized Poisson distribution,
494  \code{\link{genpoisson1}},
495  \code{\link{genpoisson2}} and
496  \code{\link{genpoisson0}},
497  since that also handles overdispersion wrt Poisson.
498  It has one advantage in that its EIM can be computed straightforwardly.
499
500
501
502  Yet to do: write a family function which uses the methods
503  of moments estimator for \eqn{k}.
504
505
506}
507\value{
508  An object of class \code{"vglmff"} (see \code{\link{vglmff-class}}).
509  The object is used by modelling functions such as \code{\link{vglm}},
510  \code{\link{rrvglm}}
511  and \code{\link{vgam}}.
512
513
514
515}
516\references{
517Lawless, J. F. (1987).
518Negative binomial and mixed Poisson regression.
519\emph{The Canadian Journal of Statistics}
520\bold{15}, 209--225.
521
522
523Hilbe, J. M. (2011).
524\emph{Negative Binomial Regression},
5252nd Edition.
526Cambridge: Cambridge University Press.
527
528
529Bliss, C. and Fisher, R. A. (1953).
530Fitting the negative binomial distribution to biological data.
531\emph{Biometrics}
532\bold{9}, 174--200.
533
534
535Conlisk, E. and Conlisk, J. and Harte, J. (2007).
536The impossibility of estimating a negative binomial
537clustering parameter from presence-absence data:
538A comment on He and Gaston.
539\emph{The American Naturalist}
540\bold{170},
541651--654.
542
543%   number = {4},
544
545
546
547
548Yee, T. W. (2014).
549Reduced-rank vector generalized linear models with two linear predictors.
550\emph{Computational Statistics and Data Analysis},
551\bold{71}, 889--902.
552
553
554
555
556
557Yee, T. W. (2020).
558The \pkg{VGAM} package for negative binomial regression.
559\emph{Australian \& New Zealand Journal of Statistics},
560\bold{61},
561  in press.
562
563
564
565}
566\author{ Thomas W. Yee,
567  and with a lot of help by Victor Miranda
568  to get it going with \code{\link{nbcanlink}} (NB-C).
569
570
571}
572\note{
573% The \pkg{VGAM} package has a few other family functions for the
574% negative binomial distribution. Currently, none of these others work
575% very well.
576
577
578  These 3 functions implement 2 common parameterizations
579  of the negative binomial (NB). Some people called the
580  NB with integer \eqn{k} the \emph{Pascal} distribution,
581  whereas if \eqn{k} is real then this is the \emph{Polya}
582  distribution. I don't. The one matching the details of
583  \code{\link[stats:NegBinomial]{rnbinom}} in terms of \eqn{p}
584  and \eqn{k} is \code{polya()}.
585
586
587  For \code{polya()} the code may fail when \eqn{p} is close
588  to 0 or 1. It is not yet compatible with \code{\link{cqo}}
589  or \code{\link{cao}}.
590
591
592
593  Suppose the response is called \code{ymat}.
594  For \code{negbinomial()}
595  the diagonal element of the \emph{expected information matrix}
596  (EIM) for parameter \eqn{k}
597  involves an infinite series; consequently SFS
598  (see \code{nsimEIM}) is used as the backup algorithm only.
599  SFS should be better if \code{max(ymat)} is large,
600  e.g., \code{max(ymat) > 1000},
601  or if there are any outliers in \code{ymat}.
602  The default algorithm involves a finite series approximation
603  to the support \code{0:Inf};
604  the arguments
605  \code{max.memory},
606  \code{min.size} and
607  \code{cutoff.prob} are pertinent.
608
609
610% \code{slope.mu},
611% the arguments \code{Maxiter} and
612% can be invoked by setting \code{nsimEIM = NULL}.
613
614
615
616  Regardless of the algorithm used,
617  convergence problems may occur, especially when the response has large
618  outliers or is large in magnitude.
619  If convergence failure occurs, try using arguments
620  (in recommended decreasing order)
621  \code{max.support},
622  \code{nsimEIM},
623  \code{cutoff.prob},
624  \code{iprobs.y},
625  \code{imethod},
626  \code{isize},
627  \code{zero},
628  \code{max.chunk.MB}.
629
630
631  The function \code{negbinomial} can be used by the fast algorithm in
632  \code{\link{cqo}}, however, setting \code{eq.tolerances = TRUE} and
633  \code{I.tolerances = FALSE} is recommended.
634
635
636% For \code{\link{cqo}} and \code{\link{cao}}, taking the square-root
637% of the response means (approximately) a \code{\link{poissonff}} family
638% may be used on the transformed data.
639
640
641% If the negative binomial family function \code{\link{negbinomial}}
642% is used for \code{cqo} then set \code{negbinomial(deviance = TRUE)}
643% is necessary. This means to minimize the deviance, which the fast
644% algorithm can handle.
645
646
647  In the first example below (Bliss and Fisher, 1953), from each of 6
648  McIntosh apple trees in an orchard that had been sprayed, 25 leaves
649  were randomly selected. On each of the leaves, the number of adult
650  female European red mites were counted.
651
652
653
654  There are two special uses of \code{negbinomial} for handling count data.
655  Firstly,
656  when used by \code{\link{rrvglm}}  this
657  results in a continuum of models in between and
658  inclusive of quasi-Poisson and negative binomial regression.
659  This is known as a reduced-rank negative binomial model \emph{(RR-NB)}.
660  It fits a negative binomial log-linear regression with variance function
661  \eqn{Var(Y)=\mu+\delta_1 \mu^{\delta_2}}{Var(Y) = mu + delta1 * mu^delta2}
662  where \eqn{\delta_1}{delta1}
663  and   \eqn{\delta_2}{delta2}
664  are parameters to be estimated by MLE.
665  Confidence intervals are available for \eqn{\delta_2}{delta2},
666  therefore it can be decided upon whether the
667  data are quasi-Poisson or negative binomial, if any.
668
669
670  Secondly,
671  the use of \code{negbinomial} with \code{parallel = TRUE}
672  inside \code{\link{vglm}}
673  can result in a model similar to \code{\link[stats]{quasipoisson}}.
674  This is named the \emph{NB-1} model.
675  The dispersion parameter is estimated by MLE whereas
676  \code{\link[stats:glm]{glm}} uses the method of moments.
677  In particular, it fits a negative binomial log-linear regression
678  with variance function
679  \eqn{Var(Y) = \phi_0   \mu}{Var(Y) = phi0 * mu}
680  where \eqn{\phi_0}{phi0}
681  is a parameter to be estimated by MLE.
682  Confidence intervals are available for \eqn{\phi_0}{phi0}.
683
684
685}
686
687\seealso{
688  \code{\link[stats]{quasipoisson}},
689  \code{\link{poissonff}},
690  \code{\link{zinegbinomial}},
691  \code{\link{negbinomial.size}} (e.g., NB-G),
692  \code{\link{nbcanlink}} (NB-C),
693  \code{\link{posnegbinomial}},
694  \code{\link{genpoisson1}},
695  \code{\link{genpoisson2}},
696  \code{\link{genpoisson0}},
697  \code{\link{inv.binomial}},
698  \code{\link[stats:NegBinomial]{NegBinomial}},
699  \code{\link{nbordlink}},
700  \code{\link{rrvglm}},
701  \code{\link{cao}},
702  \code{\link{cqo}},
703  \code{\link{CommonVGAMffArguments}},
704  \code{\link{simulate.vlm}},
705  \code{\link[stats:ppoints]{ppoints}},
706
707
708% \code{\link[stats:NegBinomial]{rnbinom}},
709% \code{\link[stats:NegBinomial]{qnbinom}}.
710% \code{\link[MASS]{rnegbin}}.
711% \code{\link{quasipoissonff}},
712
713
714}
715\examples{
716# Example 1: apple tree data (Bliss and Fisher, 1953)
717appletree <- data.frame(y = 0:7, w = c(70, 38, 17, 10, 9, 3, 2, 1))
718fit <- vglm(y ~ 1, negbinomial(deviance = TRUE), data = appletree,
719            weights = w, crit = "coef")  # Obtain the deviance
720fit <- vglm(y ~ 1, negbinomial(deviance = TRUE), data = appletree,
721            weights = w, half.step = FALSE)  # Alternative method
722summary(fit)
723coef(fit, matrix = TRUE)
724Coef(fit)  # For intercept-only models
725deviance(fit)  # NB2 only; needs 'crit = "coef"' & 'deviance = TRUE' above
726
727# Example 2: simulated data with multiple responses
728\dontrun{
729ndata <- data.frame(x2 = runif(nn <- 200))
730ndata <- transform(ndata, y1 = rnbinom(nn, mu = exp(3+x2), size = exp(1)),
731                          y2 = rnbinom(nn, mu = exp(2-x2), size = exp(0)))
732fit1 <- vglm(cbind(y1, y2) ~ x2, negbinomial, data = ndata, trace = TRUE)
733coef(fit1, matrix = TRUE)
734}
735
736# Example 3: large counts implies SFS is used
737\dontrun{
738ndata <- transform(ndata, y3 = rnbinom(nn, mu = exp(10+x2), size = exp(1)))
739with(ndata, range(y3))  # Large counts
740fit2 <- vglm(y3 ~ x2, negbinomial, data = ndata, trace = TRUE)
741coef(fit2, matrix = TRUE)
742head(fit2@weights)  # Non-empty; SFS was used
743}
744
745# Example 4: a NB-1 to estimate a negative binomial with Var(Y) = phi0 * mu
746nn <- 200  # Number of observations
747phi0 <- 10  # Specify this; should be greater than unity
748delta0 <- 1 / (phi0 - 1)
749mydata <- data.frame(x2 = runif(nn), x3 = runif(nn))
750mydata <- transform(mydata, mu = exp(2 + 3 * x2 + 0 * x3))
751mydata <- transform(mydata, y3 = rnbinom(nn, mu = mu, size = delta0 * mu))
752\dontrun{
753plot(y3 ~ x2, data = mydata, pch = "+", col = "blue",
754     main = paste("Var(Y) = ", phi0, " * mu", sep = ""), las = 1) }
755nb1 <- vglm(y3 ~ x2 + x3, negbinomial(parallel = TRUE, zero = NULL),
756            data = mydata, trace = TRUE)
757# Extracting out some quantities:
758cnb1 <- coef(nb1, matrix = TRUE)
759mydiff <- (cnb1["(Intercept)", "loglink(size)"] -
760           cnb1["(Intercept)", "loglink(mu)"])
761delta0.hat <- exp(mydiff)
762(phi.hat <- 1 + 1 / delta0.hat)  # MLE of phi
763summary(nb1)
764# Obtain a 95 percent confidence interval for phi0:
765myvec <- rbind(-1, 1, 0, 0)
766(se.mydiff <- sqrt(t(myvec) \%*\%  vcov(nb1) \%*\%  myvec))
767ci.mydiff <- mydiff + c(-1.96, 1.96) * c(se.mydiff)
768ci.delta0 <- ci.exp.mydiff <- exp(ci.mydiff)
769(ci.phi0 <- 1 + 1 / rev(ci.delta0))  # The 95 percent conf. interval for phi0
770
771Confint.nb1(nb1)  # Quick way to get it
772
773summary(glm(y3 ~ x2 + x3, quasipoisson, mydata))$disper  # cf. moment estimator
774}
775\keyword{models}
776\keyword{regression}
777
778
779
780%lmu = "loglink", lsize = "loglink",
781%            imu = NULL, isize = NULL,
782%            nsimEIM = 250, cutoff.prob = 0.999,
783%            max.support = 2000, max.chunk.MB = 30,
784%            deviance.arg = FALSE, imethod = 1,
785%            probs.y = 0.75, ishrinkage = 0.95,
786%            gsize = exp((-4):4),
787%            parallel = FALSE, ishrinkage = 0.95, zero = "size")
788
789
790
791%polya(lprob = "logitlink", lsize = "loglink",
792%      iprob = NULL, isize = NULL, probs.y = 0.75, nsimEIM = 100,
793%      imethod = 1, ishrinkage = 0.95, zero = "size")
794%polyaR(lsize = "loglink", lprob = "logitlink",
795%       isize = NULL, iprob = NULL, probs.y = 0.75, nsimEIM = 100,
796%       imethod = 1, ishrinkage = 0.95, zero = "size")
797
798