1% File src/library/stats/man/ks.test.Rd
2% Part of the R package, https://www.R-project.org
3% Copyright 1995-2020 R Core Team
4% Distributed under GPL 2 or later
5
6\name{ks.test}
7\alias{ks.test}
8\title{Kolmogorov-Smirnov Tests}
9\description{
10  Perform a one- or two-sample Kolmogorov-Smirnov test.
11}
12\usage{
13ks.test(x, y, \dots,
14        alternative = c("two.sided", "less", "greater"),
15        exact = NULL)
16}
17\arguments{
18  \item{x}{a numeric vector of data values.}
19  \item{y}{either a numeric vector of data values, or a character string
20    naming a cumulative distribution function or an actual cumulative
21    distribution function such as \code{pnorm}.  Only continuous CDFs
22    are valid.}
23  \item{\dots}{parameters of the distribution specified (as a character
24    string) by \code{y}.}
25  \item{alternative}{indicates the alternative hypothesis and must be
26    one of \code{"two.sided"} (default), \code{"less"}, or
27    \code{"greater"}.  You can specify just the initial letter of the
28    value, but the argument name must be given in full.
29    See \sQuote{Details} for the meanings of the possible values.}
30  \item{exact}{\code{NULL} or a logical indicating whether an exact
31    p-value should be computed.  See \sQuote{Details} for the meaning of
32    \code{NULL}.  Not available in the two-sample case for a one-sided
33    test or if ties are present.}
34}
35\details{
36  If \code{y} is numeric, a two-sample test of the null hypothesis
37  that \code{x} and \code{y} were drawn from the same \emph{continuous}
38  distribution is performed.
39
40  Alternatively, \code{y} can be a character string naming a continuous
41  (cumulative) distribution function, or such a function.  In this case,
42  a one-sample test is carried out of the null that the distribution
43  function which generated \code{x} is distribution \code{y} with
44  parameters specified by \code{\dots}.
45
46  The presence of ties always generates a warning, since continuous
47  distributions do not generate them.  If the ties arose from rounding
48  the tests may be approximately valid, but even modest amounts of
49  rounding can have a significant effect on the calculated statistic.
50
51  Missing values are silently omitted from \code{x} and (in the
52  two-sample case) \code{y}.
53
54  The possible values \code{"two.sided"}, \code{"less"} and
55  \code{"greater"} of \code{alternative} specify the null hypothesis
56  that the true distribution function of \code{x} is equal to, not less
57  than or not greater than the hypothesized distribution function
58  (one-sample case) or the distribution function of \code{y} (two-sample
59  case), respectively.  This is a comparison of cumulative distribution
60  functions, and the test statistic is the maximum difference in value,
61  with the statistic in the \code{"greater"} alternative being
62  \eqn{D^+ = \max_u [ F_x(u) - F_y(u) ]}{D^+ = max[F_x(u) - F_y(u)]}.
63  Thus in the two-sample case \code{alternative = "greater"} includes
64  distributions for which \code{x} is stochastically \emph{smaller} than
65  \code{y} (the CDF of \code{x} lies above and hence to the left of that
66  for \code{y}), in contrast to \code{\link{t.test}} or
67  \code{\link{wilcox.test}}.
68
69  Exact p-values are not available for the two-sample case if one-sided
70  or in the presence of ties.  If \code{exact = NULL} (the default), an
71  exact p-value is computed if the sample size is less than 100 in the
72  one-sample case \emph{and there are no ties}, and if the product of
73  the sample sizes is less than 10000 in the two-sample case.
74  Otherwise, asymptotic distributions are used whose approximations may
75  be inaccurate in small samples.  In the one-sample two-sided case,
76  exact p-values are obtained as described in Marsaglia, Tsang & Wang
77  (2003) (but not using the optional approximation in the right tail, so
78  this can be slow for small p-values).  The formula of Birnbaum &
79  Tingey (1951) is used for the one-sample one-sided case.
80
81  If a single-sample test is used, the parameters specified in
82  \code{\dots} must be pre-specified and not estimated from the data.
83  There is some more refined distribution theory for the KS test with
84  estimated parameters (see Durbin, 1973), but that is not implemented
85  in \code{ks.test}.
86}
87\value{
88  A list with class \code{"htest"} containing the following components:
89  \item{statistic}{the value of the test statistic.}
90  \item{p.value}{the p-value of the test.}
91  \item{alternative}{a character string describing the alternative
92    hypothesis.}
93  \item{method}{a character string indicating what type of test was
94    performed.}
95  \item{data.name}{a character string giving the name(s) of the data.}
96}
97\source{
98  The two-sided one-sample distribution comes \emph{via}
99  Marsaglia, Tsang and Wang (2003).
100}
101\references{
102  Z. W. Birnbaum and Fred H. Tingey (1951).
103  One-sided confidence contours for probability distribution functions.
104  \emph{The Annals of Mathematical Statistics}, \bold{22}/4, 592--596.
105  \doi{10.1214/aoms/1177729550}.
106
107  William J. Conover (1971).
108  \emph{Practical Nonparametric Statistics}.
109  New York: John Wiley & Sons.
110  Pages 295--301 (one-sample Kolmogorov test),
111  309--314 (two-sample Smirnov test).
112
113  Durbin, J. (1973).
114  \emph{Distribution theory for tests based on the sample distribution
115    function}.
116  SIAM.
117
118  George Marsaglia, Wai Wan Tsang and Jingbo Wang (2003).
119  Evaluating Kolmogorov's distribution.
120  \emph{Journal of Statistical Software}, \bold{8}/18.
121  \doi{10.18637/jss.v008.i18}.
122}
123\seealso{
124  \code{\link{shapiro.test}} which performs the Shapiro-Wilk test for
125  normality.
126}
127\examples{
128require(graphics)
129
130x <- rnorm(50)
131y <- runif(30)
132# Do x and y come from the same distribution?
133ks.test(x, y)
134# Does x come from a shifted gamma distribution with shape 3 and rate 2?
135ks.test(x+2, "pgamma", 3, 2) # two-sided, exact
136ks.test(x+2, "pgamma", 3, 2, exact = FALSE)
137ks.test(x+2, "pgamma", 3, 2, alternative = "gr")
138
139# test if x is stochastically larger than x2
140x2 <- rnorm(50, -1)
141plot(ecdf(x), xlim = range(c(x, x2)))
142plot(ecdf(x2), add = TRUE, lty = "dashed")
143t.test(x, x2, alternative = "g")
144wilcox.test(x, x2, alternative = "g")
145ks.test(x, x2, alternative = "l")
146}
147\keyword{htest}
148