1# File src/library/stats/R/ansari.test.R 2# Part of the R package, https://www.R-project.org 3# 4# Copyright (C) 1995-2020 The R Core Team 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# A copy of the GNU General Public License is available at 17# https://www.R-project.org/Licenses/ 18 19ansari.test <- function(x, ...) UseMethod("ansari.test") 20 21ansari.test.default <- 22function(x, y, alternative = c("two.sided", "less", "greater"), 23 exact = NULL, conf.int = FALSE, conf.level = 0.95, ...) 24{ 25 alternative <- match.arg(alternative) 26 if(conf.int) { 27 if(!((length(conf.level) == 1L) 28 && is.finite(conf.level) 29 && (conf.level > 0) 30 && (conf.level < 1))) 31 stop("'conf.level' must be a single number between 0 and 1") 32 } 33 DNAME <- paste(deparse1(substitute(x)), "and", deparse1(substitute(y))) 34 35 x <- x[complete.cases(x)] 36 y <- y[complete.cases(y)] 37 m <- as.integer(length(x)) 38 if(is.na(m) || m < 1L) 39 stop("not enough 'x' observations") 40 n <- as.integer(length(y)) 41 if(is.na(n) || n < 1L) 42 stop("not enough 'y' observations") 43 N <- m + n 44 45 r <- rank(c(x, y)) 46 STATISTIC <- sum(pmin(r, N - r + 1)[seq_along(x)]) 47 TIES <- (length(r) != length(unique(r))) 48 49 if(is.null(exact)) 50 exact <- ((m < 50L) && (n < 50L)) 51 52 if(exact && !TIES) { 53 pansari <- function(q, m, n) .Call(C_pAnsari, q, m, n) 54 PVAL <- 55 switch(alternative, 56 two.sided = { 57 if (STATISTIC > ((m + 1)^2 %/% 4 58 + ((m * n) %/% 2) / 2)) 59 p <- 1 - pansari(STATISTIC - 1, m, n) 60 else 61 p <- pansari(STATISTIC, m, n) 62 min(2 * p, 1) 63 }, 64 less = 1 - pansari(STATISTIC - 1, m, n), 65 greater = pansari(STATISTIC, m, n)) 66 if (conf.int) { 67 qansari <- function(p, m, n) .Call(C_qAnsari, p, m, n) 68 alpha <- 1 - conf.level 69 x <- sort(x) 70 y <- sort(y) 71 ab <- function(sig) { 72 rab <- rank(c(x/sig, y)) 73 sum(pmin(rab, N - rab + 1)[seq_along(x)]) 74 } 75 ratio <- outer(x, y, "/") 76 aratio <- ratio[ratio >= 0] 77 sigma <- sort(aratio) 78 79 cci <- function(alpha) { 80 u <- absigma - qansari(alpha/2, m, n) 81 l <- absigma - qansari(1 - alpha/2, m, n) 82 if(length(u[u >= 0]) == 0L || length(l[l > 0]) == 0L) { 83 warning("samples differ in location: cannot compute confidence set, returning NA") 84 return(c(NA, NA)) 85 } 86 ## Check if the statistic exceeds both quantiles first. 87 ## uci <- NULL 88 ## lci <- NULL 89 ## if (is.null(uci)) { 90 u[u < 0] <- NA 91 uci <- min(sigma[which(u == min(u, na.rm = TRUE))]) 92 ## } 93 ## if (is.null(lci)) { 94 l[l <= 0] <- NA 95 lci <- max(sigma[which(l == min(l, na.rm = TRUE))]) 96 ## } 97 ## The process of the statistics does not need to be 98 ## monotone in sigma: check this and interchange quantiles. 99 if (uci > lci) { 100 l <- absigma - qansari(alpha/2, m, n) 101 u <- absigma - qansari(1 - alpha/2, m, n) 102 u[u < 0] <- NA 103 uci <- min(sigma[which(u == min(u, na.rm = TRUE))]) 104 l[l <= 0] <- NA 105 lci <- max(sigma[which(l == min(l, na.rm = TRUE))]) 106 } 107 c(uci, lci) 108 } 109 110 cint <- if(length(sigma) < 1L) { 111 warning("cannot compute confidence set, returning NA") 112 c(NA, NA) 113 } 114 else { 115 ## Compute statistics directly: computing the steps is 116 ## not faster. 117 absigma <- 118 sapply(sigma + c(diff(sigma)/2, 119 sigma[length(sigma)]*1.01), ab) 120 switch(alternative, two.sided = cci(alpha), 121 greater = c(cci(alpha*2)[1L], Inf), 122 less = c(0, cci(alpha*2)[2L])) 123 } 124 attr(cint, "conf.level") <- conf.level 125 u <- absigma - qansari(0.5, m, n) 126 sgr <- if(length(sgr <- sigma[u <= 0]) == 0L) NA else max(sgr) 127 sle <- if(length(sle <- sigma[u > 0]) == 0L) NA else min(sle) 128 ESTIMATE <- mean(c(sle, sgr)) 129 } 130 } 131 else { 132 EVEN <- ((N %% 2L) == 0L) 133 normalize <- function(s, r, TIES, m=length(x), n=length(y)) { 134 z <- if(EVEN) 135 s - m * (N + 2)/4 136 else 137 s - m * (N + 1)^2/(4 * N) 138 if (!TIES) { 139 SIGMA <- if(EVEN) 140 sqrt((m * n * (N + 2) * (N - 2))/(48 * (N - 1))) 141 else 142 sqrt((m * n * (N + 1) * (3 + N^2))/(48 * N^2)) 143 } 144 else { 145 r <- rle(sort(pmin(r, N - r + 1))) 146 SIGMA <- if(EVEN) 147 sqrt(m * n 148 * (16 * sum(r$lengths * r$values^2) - N * (N + 2)^2) 149 / (16 * N * (N - 1))) 150 else 151 sqrt(m * n 152 * (16 * N * sum(r$lengths * r$values^2) - (N + 1)^4) 153 / (16 * N^2 * (N - 1))) 154 } 155 z / SIGMA 156 } 157 p <- pnorm(normalize(STATISTIC, r, TIES)) 158 PVAL <- switch(alternative, 159 two.sided = 2 * min(p, 1 - p), 160 less = 1 - p, 161 greater = p) 162 163 if(conf.int && !exact) { 164 alpha <- 1 - conf.level 165 ab2 <- function(sig, zq) { 166 r <- rank(c(x / sig, y)) 167 s <- sum(pmin(r, N - r + 1)[seq_along(x)]) 168 TIES <- (length(r) != length(unique(r))) 169 normalize(s, r, TIES, length(x), length(y)) - zq 170 } 171 ## Use uniroot here. 172 ## Compute the range of sigma first. 173 srangepos <- NULL 174 srangeneg <- NULL 175 if (length(x[x > 0]) && length(y[y > 0])) 176 srangepos <- 177 c(min(x[x>0], na.rm=TRUE)/max(y[y>0], na.rm=TRUE), 178 max(x[x>0], na.rm=TRUE)/min(y[y>0], na.rm=TRUE)) 179 if (length(x[x <= 0]) && length(y[y < 0])) 180 srangeneg <- 181 c(min(x[x<=0], na.rm=TRUE)/max(y[y<0], na.rm=TRUE), 182 max(x[x<=0], na.rm=TRUE)/min(y[y<0], na.rm=TRUE)) 183 if (any(is.infinite(c(srangepos, srangeneg)))) { 184 warning("cannot compute asymptotic confidence set or estimator") 185 conf.int <- FALSE 186 } else { 187 ccia <- function(alpha) { 188 ## Check if the statistic exceeds both quantiles 189 ## first. 190 statu <- ab2(srange[1L], zq=qnorm(alpha/2)) 191 statl <- ab2(srange[2L], zq=qnorm(alpha/2, lower.tail=FALSE)) 192 if (statu > 0 || statl < 0) { 193 warning("samples differ in location: cannot compute confidence set, returning NA") 194 return(c(NA, NA)) 195 } 196 u <- uniroot(ab2, srange, tol=1e-4, 197 zq=qnorm(alpha/2))$root 198 l <- uniroot(ab2, srange, tol=1e-4, 199 zq=qnorm(alpha/2, lower.tail=FALSE))$root 200 ## The process of the statistics does not need to be 201 ## monotone: sort is ok here. 202 sort(c(u, l)) 203 } 204 srange <- range(c(srangepos, srangeneg), na.rm=FALSE) 205 cint <- switch(alternative, 206 two.sided = ccia(alpha), 207 greater = c(ccia(alpha*2)[1L], Inf), 208 less = c(0, ccia(alpha*2)[2L]) ) 209 attr(cint, "conf.level") <- conf.level 210 ## Check if the statistic exceeds both quantiles first. 211 statu <- ab2(srange[1L], zq=0) 212 statl <- ab2(srange[2L], zq=0) 213 if (statu > 0 || statl < 0) { 214 ESTIMATE <- NA 215 warning("cannot compute estimate, returning NA") 216 } else 217 ESTIMATE <- uniroot(ab2, srange, tol=1e-4, zq=0)$root 218 } 219 } 220 if(exact && TIES) { 221 warning("cannot compute exact p-value with ties") 222 if(conf.int) 223 warning("cannot compute exact confidence intervals with ties") 224 } 225 } 226 227 names(STATISTIC) <- "AB" 228 RVAL <- list(statistic = STATISTIC, 229 p.value = PVAL, 230 null.value = c("ratio of scales" = 1), 231 alternative = alternative, 232 method = "Ansari-Bradley test", 233 data.name = DNAME) 234 if(conf.int) 235 RVAL <- c(RVAL, 236 list(conf.int = cint, 237 estimate = c("ratio of scales" = ESTIMATE))) 238 class(RVAL) <- "htest" 239 return(RVAL) 240} 241 242ansari.test.formula <- 243function(formula, data, subset, na.action, ...) 244{ 245 if(missing(formula) 246 || (length(formula) != 3L) 247 || (length(attr(terms(formula[-2L]), "term.labels")) != 1L)) 248 stop("'formula' missing or incorrect") 249 m <- match.call(expand.dots = FALSE) 250 if(is.matrix(eval(m$data, parent.frame()))) 251 m$data <- as.data.frame(data) 252 ## need stats:: for non-standard evaluation 253 m[[1L]] <- quote(stats::model.frame) 254 m$... <- NULL 255 mf <- eval(m, parent.frame()) 256 DNAME <- paste(names(mf), collapse = " by ") 257 names(mf) <- NULL 258 response <- attr(attr(mf, "terms"), "response") 259 g <- factor(mf[[-response]]) 260 if(nlevels(g) != 2L) 261 stop("grouping factor must have exactly 2 levels") 262 DATA <- setNames(split(mf[[response]], g), c("x", "y")) 263 y <- do.call("ansari.test", c(DATA, list(...))) 264 y$data.name <- DNAME 265 y 266} 267