1% File src/library/stats/man/ks.test.Rd 2% Part of the R package, https://www.R-project.org 3% Copyright 1995-2020 R Core Team 4% Distributed under GPL 2 or later 5 6\name{ks.test} 7\alias{ks.test} 8\title{Kolmogorov-Smirnov Tests} 9\description{ 10 Perform a one- or two-sample Kolmogorov-Smirnov test. 11} 12\usage{ 13ks.test(x, y, \dots, 14 alternative = c("two.sided", "less", "greater"), 15 exact = NULL) 16} 17\arguments{ 18 \item{x}{a numeric vector of data values.} 19 \item{y}{either a numeric vector of data values, or a character string 20 naming a cumulative distribution function or an actual cumulative 21 distribution function such as \code{pnorm}. Only continuous CDFs 22 are valid.} 23 \item{\dots}{parameters of the distribution specified (as a character 24 string) by \code{y}.} 25 \item{alternative}{indicates the alternative hypothesis and must be 26 one of \code{"two.sided"} (default), \code{"less"}, or 27 \code{"greater"}. You can specify just the initial letter of the 28 value, but the argument name must be given in full. 29 See \sQuote{Details} for the meanings of the possible values.} 30 \item{exact}{\code{NULL} or a logical indicating whether an exact 31 p-value should be computed. See \sQuote{Details} for the meaning of 32 \code{NULL}. Not available in the two-sample case for a one-sided 33 test or if ties are present.} 34} 35\details{ 36 If \code{y} is numeric, a two-sample test of the null hypothesis 37 that \code{x} and \code{y} were drawn from the same \emph{continuous} 38 distribution is performed. 39 40 Alternatively, \code{y} can be a character string naming a continuous 41 (cumulative) distribution function, or such a function. In this case, 42 a one-sample test is carried out of the null that the distribution 43 function which generated \code{x} is distribution \code{y} with 44 parameters specified by \code{\dots}. 45 46 The presence of ties always generates a warning, since continuous 47 distributions do not generate them. If the ties arose from rounding 48 the tests may be approximately valid, but even modest amounts of 49 rounding can have a significant effect on the calculated statistic. 50 51 Missing values are silently omitted from \code{x} and (in the 52 two-sample case) \code{y}. 53 54 The possible values \code{"two.sided"}, \code{"less"} and 55 \code{"greater"} of \code{alternative} specify the null hypothesis 56 that the true distribution function of \code{x} is equal to, not less 57 than or not greater than the hypothesized distribution function 58 (one-sample case) or the distribution function of \code{y} (two-sample 59 case), respectively. This is a comparison of cumulative distribution 60 functions, and the test statistic is the maximum difference in value, 61 with the statistic in the \code{"greater"} alternative being 62 \eqn{D^+ = \max_u [ F_x(u) - F_y(u) ]}{D^+ = max[F_x(u) - F_y(u)]}. 63 Thus in the two-sample case \code{alternative = "greater"} includes 64 distributions for which \code{x} is stochastically \emph{smaller} than 65 \code{y} (the CDF of \code{x} lies above and hence to the left of that 66 for \code{y}), in contrast to \code{\link{t.test}} or 67 \code{\link{wilcox.test}}. 68 69 Exact p-values are not available for the two-sample case if one-sided 70 or in the presence of ties. If \code{exact = NULL} (the default), an 71 exact p-value is computed if the sample size is less than 100 in the 72 one-sample case \emph{and there are no ties}, and if the product of 73 the sample sizes is less than 10000 in the two-sample case. 74 Otherwise, asymptotic distributions are used whose approximations may 75 be inaccurate in small samples. In the one-sample two-sided case, 76 exact p-values are obtained as described in Marsaglia, Tsang & Wang 77 (2003) (but not using the optional approximation in the right tail, so 78 this can be slow for small p-values). The formula of Birnbaum & 79 Tingey (1951) is used for the one-sample one-sided case. 80 81 If a single-sample test is used, the parameters specified in 82 \code{\dots} must be pre-specified and not estimated from the data. 83 There is some more refined distribution theory for the KS test with 84 estimated parameters (see Durbin, 1973), but that is not implemented 85 in \code{ks.test}. 86} 87\value{ 88 A list with class \code{"htest"} containing the following components: 89 \item{statistic}{the value of the test statistic.} 90 \item{p.value}{the p-value of the test.} 91 \item{alternative}{a character string describing the alternative 92 hypothesis.} 93 \item{method}{a character string indicating what type of test was 94 performed.} 95 \item{data.name}{a character string giving the name(s) of the data.} 96} 97\source{ 98 The two-sided one-sample distribution comes \emph{via} 99 Marsaglia, Tsang and Wang (2003). 100} 101\references{ 102 Z. W. Birnbaum and Fred H. Tingey (1951). 103 One-sided confidence contours for probability distribution functions. 104 \emph{The Annals of Mathematical Statistics}, \bold{22}/4, 592--596. 105 \doi{10.1214/aoms/1177729550}. 106 107 William J. Conover (1971). 108 \emph{Practical Nonparametric Statistics}. 109 New York: John Wiley & Sons. 110 Pages 295--301 (one-sample Kolmogorov test), 111 309--314 (two-sample Smirnov test). 112 113 Durbin, J. (1973). 114 \emph{Distribution theory for tests based on the sample distribution 115 function}. 116 SIAM. 117 118 George Marsaglia, Wai Wan Tsang and Jingbo Wang (2003). 119 Evaluating Kolmogorov's distribution. 120 \emph{Journal of Statistical Software}, \bold{8}/18. 121 \doi{10.18637/jss.v008.i18}. 122} 123\seealso{ 124 \code{\link{shapiro.test}} which performs the Shapiro-Wilk test for 125 normality. 126} 127\examples{ 128require(graphics) 129 130x <- rnorm(50) 131y <- runif(30) 132# Do x and y come from the same distribution? 133ks.test(x, y) 134# Does x come from a shifted gamma distribution with shape 3 and rate 2? 135ks.test(x+2, "pgamma", 3, 2) # two-sided, exact 136ks.test(x+2, "pgamma", 3, 2, exact = FALSE) 137ks.test(x+2, "pgamma", 3, 2, alternative = "gr") 138 139# test if x is stochastically larger than x2 140x2 <- rnorm(50, -1) 141plot(ecdf(x), xlim = range(c(x, x2))) 142plot(ecdf(x2), add = TRUE, lty = "dashed") 143t.test(x, x2, alternative = "g") 144wilcox.test(x, x2, alternative = "g") 145ks.test(x, x2, alternative = "l") 146} 147\keyword{htest} 148