1#  File src/library/stats/R/ansari.test.R
2#  Part of the R package, https://www.R-project.org
3#
4#  Copyright (C) 1995-2020 The R Core Team
5#
6#  This program is free software; you can redistribute it and/or modify
7#  it under the terms of the GNU General Public License as published by
8#  the Free Software Foundation; either version 2 of the License, or
9#  (at your option) any later version.
10#
11#  This program is distributed in the hope that it will be useful,
12#  but WITHOUT ANY WARRANTY; without even the implied warranty of
13#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#  GNU General Public License for more details.
15#
16#  A copy of the GNU General Public License is available at
17#  https://www.R-project.org/Licenses/
18
19ansari.test <- function(x, ...) UseMethod("ansari.test")
20
21ansari.test.default <-
22function(x, y, alternative = c("two.sided", "less", "greater"),
23         exact = NULL, conf.int = FALSE, conf.level = 0.95, ...)
24{
25    alternative <- match.arg(alternative)
26    if(conf.int) {
27        if(!((length(conf.level) == 1L)
28             && is.finite(conf.level)
29             && (conf.level > 0)
30             && (conf.level < 1)))
31            stop("'conf.level' must be a single number between 0 and 1")
32    }
33    DNAME <- paste(deparse1(substitute(x)), "and", deparse1(substitute(y)))
34
35    x <- x[complete.cases(x)]
36    y <- y[complete.cases(y)]
37    m <- as.integer(length(x))
38    if(is.na(m) || m < 1L)
39        stop("not enough 'x' observations")
40    n <- as.integer(length(y))
41    if(is.na(n) || n < 1L)
42        stop("not enough 'y' observations")
43    N <- m + n
44
45    r <- rank(c(x, y))
46    STATISTIC <- sum(pmin(r, N - r + 1)[seq_along(x)])
47    TIES <- (length(r) != length(unique(r)))
48
49    if(is.null(exact))
50        exact <- ((m < 50L) && (n < 50L))
51
52    if(exact && !TIES) {
53        pansari <- function(q, m, n) .Call(C_pAnsari, q, m, n)
54        PVAL <-
55            switch(alternative,
56                   two.sided = {
57                       if (STATISTIC > ((m + 1)^2 %/% 4
58                                        + ((m * n) %/% 2) / 2))
59                           p <- 1 - pansari(STATISTIC - 1, m, n)
60                       else
61                           p <- pansari(STATISTIC, m, n)
62                       min(2 * p, 1)
63                   },
64                   less = 1 - pansari(STATISTIC - 1, m, n),
65                   greater = pansari(STATISTIC, m, n))
66        if (conf.int) {
67            qansari <- function(p, m, n) .Call(C_qAnsari, p, m, n)
68            alpha <- 1 - conf.level
69            x <- sort(x)
70            y <- sort(y)
71            ab <- function(sig) {
72                rab <- rank(c(x/sig, y))
73                sum(pmin(rab, N - rab + 1)[seq_along(x)])
74            }
75            ratio <- outer(x, y, "/")
76            aratio <- ratio[ratio >= 0]
77            sigma <- sort(aratio)
78
79            cci <- function(alpha) {
80              u <- absigma - qansari(alpha/2,  m, n)
81              l <- absigma - qansari(1 - alpha/2, m, n)
82              if(length(u[u >= 0]) == 0L || length(l[l > 0]) == 0L) {
83                  warning("samples differ in location: cannot compute confidence set, returning NA")
84                  return(c(NA, NA))
85              }
86              ## Check if the statistic exceeds both quantiles first.
87              ## uci <- NULL
88              ## lci <- NULL
89              ## if (is.null(uci)) {
90                  u[u < 0] <- NA
91                  uci <- min(sigma[which(u == min(u, na.rm = TRUE))])
92              ## }
93              ## if (is.null(lci)) {
94                  l[l <= 0] <- NA
95                  lci <- max(sigma[which(l == min(l, na.rm = TRUE))])
96              ## }
97              ## The process of the statistics does not need to be
98              ## monotone in sigma: check this and interchange quantiles.
99              if (uci > lci) {
100                  l <- absigma - qansari(alpha/2,  m, n)
101                  u <- absigma - qansari(1 - alpha/2, m, n)
102                  u[u < 0] <- NA
103                  uci <- min(sigma[which(u == min(u, na.rm = TRUE))])
104                  l[l <= 0] <- NA
105                  lci <- max(sigma[which(l == min(l, na.rm = TRUE))])
106               }
107               c(uci, lci)
108            }
109
110            cint <- if(length(sigma) < 1L) {
111                warning("cannot compute confidence set, returning NA")
112                c(NA, NA)
113            }
114            else {
115                ## Compute statistics directly: computing the steps is
116                ## not faster.
117                absigma <-
118                    sapply(sigma + c(diff(sigma)/2,
119                                     sigma[length(sigma)]*1.01), ab)
120                switch(alternative, two.sided = cci(alpha),
121                       greater = c(cci(alpha*2)[1L], Inf),
122                       less    = c(0, cci(alpha*2)[2L]))
123            }
124            attr(cint, "conf.level") <- conf.level
125            u <- absigma - qansari(0.5, m, n)
126            sgr <- if(length(sgr <- sigma[u <= 0]) == 0L) NA else max(sgr)
127            sle <- if(length(sle <- sigma[u  > 0]) == 0L) NA else min(sle)
128            ESTIMATE <- mean(c(sle, sgr))
129        }
130    }
131    else {
132        EVEN <- ((N %% 2L) == 0L)
133        normalize <- function(s, r, TIES, m=length(x), n=length(y)) {
134            z <- if(EVEN)
135                s - m * (N + 2)/4
136            else
137                s - m * (N + 1)^2/(4 * N)
138            if (!TIES) {
139                SIGMA <- if(EVEN)
140                    sqrt((m * n * (N + 2) * (N - 2))/(48 * (N - 1)))
141                else
142                    sqrt((m * n * (N + 1) * (3 + N^2))/(48 * N^2))
143            }
144            else {
145                r <- rle(sort(pmin(r, N - r + 1)))
146                SIGMA <- if(EVEN)
147                    sqrt(m * n
148                         * (16 * sum(r$lengths * r$values^2) - N * (N + 2)^2)
149                         / (16 * N * (N - 1)))
150                else
151                    sqrt(m * n
152                         * (16 * N * sum(r$lengths * r$values^2) - (N + 1)^4)
153                         / (16 * N^2 * (N - 1)))
154            }
155            z / SIGMA
156        }
157        p <- pnorm(normalize(STATISTIC, r, TIES))
158        PVAL <- switch(alternative,
159                       two.sided = 2 * min(p, 1 - p),
160                       less = 1 - p,
161                       greater = p)
162
163        if(conf.int && !exact) {
164            alpha <- 1 - conf.level
165            ab2 <- function(sig, zq) {
166                r <- rank(c(x / sig, y))
167                s <- sum(pmin(r, N - r + 1)[seq_along(x)])
168                TIES <- (length(r) != length(unique(r)))
169                normalize(s, r, TIES, length(x), length(y)) - zq
170            }
171            ## Use uniroot here.
172            ## Compute the range of sigma first.
173            srangepos <- NULL
174            srangeneg <- NULL
175            if (length(x[x > 0]) && length(y[y > 0]))
176                srangepos <-
177                    c(min(x[x>0], na.rm=TRUE)/max(y[y>0], na.rm=TRUE),
178                      max(x[x>0], na.rm=TRUE)/min(y[y>0], na.rm=TRUE))
179            if (length(x[x <= 0]) && length(y[y < 0]))
180                srangeneg <-
181                    c(min(x[x<=0], na.rm=TRUE)/max(y[y<0], na.rm=TRUE),
182                      max(x[x<=0], na.rm=TRUE)/min(y[y<0], na.rm=TRUE))
183            if (any(is.infinite(c(srangepos, srangeneg)))) {
184                warning("cannot compute asymptotic confidence set or estimator")
185                conf.int <- FALSE
186            } else {
187                ccia <- function(alpha) {
188                    ## Check if the statistic exceeds both quantiles
189                    ## first.
190                    statu <- ab2(srange[1L], zq=qnorm(alpha/2))
191                    statl <- ab2(srange[2L], zq=qnorm(alpha/2, lower.tail=FALSE))
192                    if (statu > 0 || statl < 0) {
193                        warning("samples differ in location: cannot compute confidence set, returning NA")
194                        return(c(NA, NA))
195                    }
196                    u <- uniroot(ab2, srange, tol=1e-4,
197                                 zq=qnorm(alpha/2))$root
198                    l <- uniroot(ab2, srange, tol=1e-4,
199                                 zq=qnorm(alpha/2, lower.tail=FALSE))$root
200                    ## The process of the statistics does not need to be
201                    ## monotone: sort is ok here.
202                    sort(c(u, l))
203                }
204                srange <- range(c(srangepos, srangeneg), na.rm=FALSE)
205                cint <- switch(alternative,
206                               two.sided = ccia(alpha),
207                               greater = c(ccia(alpha*2)[1L], Inf),
208                               less = c(0, ccia(alpha*2)[2L]) )
209                attr(cint, "conf.level") <- conf.level
210                ## Check if the statistic exceeds both quantiles first.
211                statu <- ab2(srange[1L], zq=0)
212                statl <- ab2(srange[2L], zq=0)
213                if (statu > 0 || statl < 0) {
214                    ESTIMATE <- NA
215                    warning("cannot compute estimate, returning NA")
216                } else
217                    ESTIMATE <- uniroot(ab2, srange, tol=1e-4, zq=0)$root
218            }
219        }
220        if(exact && TIES) {
221            warning("cannot compute exact p-value with ties")
222            if(conf.int)
223                warning("cannot compute exact confidence intervals with ties")
224        }
225    }
226
227    names(STATISTIC) <- "AB"
228    RVAL <- list(statistic = STATISTIC,
229                 p.value = PVAL,
230                 null.value = c("ratio of scales" = 1),
231                 alternative = alternative,
232                 method = "Ansari-Bradley test",
233                 data.name = DNAME)
234    if(conf.int)
235        RVAL <- c(RVAL,
236                  list(conf.int = cint,
237                       estimate = c("ratio of scales" = ESTIMATE)))
238    class(RVAL) <- "htest"
239    return(RVAL)
240}
241
242ansari.test.formula <-
243function(formula, data, subset, na.action, ...)
244{
245    if(missing(formula)
246       || (length(formula) != 3L)
247       || (length(attr(terms(formula[-2L]), "term.labels")) != 1L))
248        stop("'formula' missing or incorrect")
249    m <- match.call(expand.dots = FALSE)
250    if(is.matrix(eval(m$data, parent.frame())))
251        m$data <- as.data.frame(data)
252    ## need stats:: for non-standard evaluation
253    m[[1L]] <- quote(stats::model.frame)
254    m$... <- NULL
255    mf <- eval(m, parent.frame())
256    DNAME <- paste(names(mf), collapse = " by ")
257    names(mf) <- NULL
258    response <- attr(attr(mf, "terms"), "response")
259    g <- factor(mf[[-response]])
260    if(nlevels(g) != 2L)
261        stop("grouping factor must have exactly 2 levels")
262    DATA <- setNames(split(mf[[response]], g), c("x", "y"))
263    y <- do.call("ansari.test", c(DATA, list(...)))
264    y$data.name <- DNAME
265    y
266}
267