1% File src/library/stats/man/Hypergeometric.Rd
2% Part of the R package, https://www.R-project.org
3% Copyright 1995-2020 R Core Team
4% Distributed under GPL 2 or later
5
6\name{Hypergeometric}
7\alias{Hypergeometric}
8\alias{dhyper}
9\alias{phyper}
10\alias{qhyper}
11\alias{rhyper}
12\title{The Hypergeometric Distribution}
13\description{
14  Density, distribution function, quantile function and random
15  generation for the hypergeometric distribution.
16}
17\usage{
18dhyper(x, m, n, k, log = FALSE)
19phyper(q, m, n, k, lower.tail = TRUE, log.p = FALSE)
20qhyper(p, m, n, k, lower.tail = TRUE, log.p = FALSE)
21rhyper(nn, m, n, k)
22}
23\arguments{
24  \item{x, q}{vector of quantiles representing the number of white balls
25    drawn without replacement from an urn which contains both black and
26    white balls.}
27  \item{m}{the number of white balls in the urn.}
28  \item{n}{the number of black balls in the urn.}
29  \item{k}{the number of balls drawn from the urn, hence must be in
30    \eqn{0,1,\dots, m+n}.}
31  \item{p}{probability, it must be between 0 and 1.}
32  \item{nn}{number of observations.  If \code{length(nn) > 1}, the length
33    is taken to be the number required.}
34  \item{log, log.p}{logical; if TRUE, probabilities p are given as log(p).}
35  \item{lower.tail}{logical; if TRUE (default), probabilities are
36    \eqn{P[X \le x]}, otherwise, \eqn{P[X > x]}.}
37}
38\value{
39  \code{dhyper} gives the density,
40  \code{phyper} gives the distribution function,
41  \code{qhyper} gives the quantile function, and
42  \code{rhyper} generates random deviates.
43
44  Invalid arguments will result in return value \code{NaN}, with a warning.
45
46  The length of the result is determined by \code{n} for
47  \code{rhyper}, and is the maximum of the lengths of the
48  numerical arguments for the other functions.
49
50  The numerical arguments other than \code{n} are recycled to the
51  length of the result.  Only the first elements of the logical
52  arguments are used.
53}
54\details{
55  The hypergeometric distribution is used for sampling \emph{without}
56  replacement.  The density of this distribution with parameters
57  \code{m}, \code{n} and \code{k} (named \eqn{Np}, \eqn{N-Np}, and
58  \eqn{n}, respectively in the reference below, where \eqn{N := m+n} is also used
59  in other references) is given by
60  \deqn{
61    p(x) = \left. {m \choose x}{n \choose k-x} \right/ {m+n \choose k}%
62  }{p(x) =      choose(m, x) choose(n, k-x) / choose(m+n, k)}
63  for \eqn{x = 0, \ldots, k}.
64
65  Note that \eqn{p(x)} is non-zero only for
66  \eqn{\max(0, k-n) \le x \le \min(k, m)}{max(0, k-n) <= x <= min(k, m)}.
67
68  With \eqn{p := m/(m+n)} (hence \eqn{Np = N \times p} in the
69  reference's notation), the first two moments are mean
70  \deqn{E[X] = \mu = k p} and variance
71  \deqn{\mbox{Var}(X) = k p (1 - p) \frac{m+n-k}{m+n-1},}{%
72               Var(X) = k p (1 - p) * (m+n-k)/(m+n-1),}
73  which shows the closeness to the Binomial\eqn{(k,p)} (where the
74  hypergeometric has smaller variance unless \eqn{k = 1}).
75
76  The quantile is defined as the smallest value \eqn{x} such that
77  \eqn{F(x) \ge p}, where \eqn{F} is the distribution function.
78
79  In \code{rhyper()}, if one of \eqn{m, n, k} exceeds \code{\link{.Machine}$integer.max},
80  currently the equivalent of \code{qhyper(runif(nn), m,n,k)} is used
81  which is comparably slow while instead a binomial approximation may be
82  considerably more efficient.
83}
84\source{
85  \code{dhyper} computes via binomial probabilities, using code
86  contributed by Catherine Loader (see \code{\link{dbinom}}).
87
88  \code{phyper} is based on calculating \code{dhyper} and
89  \code{phyper(...)/dhyper(...)} (as a summation), based on ideas of Ian
90  Smith and Morten Welinder.
91
92  \code{qhyper} is based on inversion (of an earlier \code{phyper()} algorithm).
93
94  \code{rhyper} is based on a corrected version of
95
96  Kachitvichyanukul, V. and Schmeiser, B. (1985).
97  Computer generation of hypergeometric random variates.
98  \emph{Journal of Statistical Computation and Simulation},
99  \bold{22}, 127--145.
100}
101\references{
102  Johnson, N. L., Kotz, S., and Kemp, A. W. (1992)
103  \emph{Univariate Discrete Distributions},
104  Second Edition. New York: Wiley.
105}
106\seealso{
107  \link{Distributions} for other standard distributions.
108}
109\examples{
110m <- 10; n <- 7; k <- 8
111x <- 0:(k+1)
112rbind(phyper(x, m, n, k), dhyper(x, m, n, k))
113all(phyper(x, m, n, k) == cumsum(dhyper(x, m, n, k)))  # FALSE
114\donttest{## but error is very small:
115signif(phyper(x, m, n, k) - cumsum(dhyper(x, m, n, k)), digits = 3)
116}}
117\keyword{distribution}
118