1# Copyright 2001-6 by Roger Bivand, bugfix large n Ronnie Babigumira 2# 3 4joincount <- function(dums, listw) { 5 nc <- which(colSums(dums) > 1) 6# n <- length(listw$neighbours) 7 cardnb <- card(listw$neighbours) 8 res <- as.numeric(rep(0, ncol(dums))) 9 for (lev in nc) { 10 res[lev] <- .Call("jcintern", listw$neighbours, 11 listw$weights, as.integer(dums[,lev]), 12 as.integer(cardnb), PACKAGE="spdep") 13 } 14 res 15} 16 17joincount.test <- function(fx, listw, zero.policy=NULL, 18 alternative="greater", sampling="nonfree", 19 spChk=NULL, adjust.n=TRUE) { 20 if (is.null(zero.policy)) 21 zero.policy <- get("zeroPolicy", envir = .spdepOptions) 22 stopifnot(is.logical(zero.policy)) 23 alternative <- match.arg(alternative, c("greater", "less", "two.sided")) 24 sampling <- match.arg(sampling, c("nonfree", "free")) 25 if (!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)), 26 "is not a listw object")) 27 if (!is.factor(fx)) stop(paste(deparse(substitute(x)), 28 "is not a factor")) 29 if (any(is.na(fx))) stop("NA in factor") 30 n <- length(listw$neighbours) 31 if (n != length(fx)) stop("objects of different length") 32 cards <- card(listw$neighbours) 33 if (!zero.policy && any(cards == 0)) 34 stop("regions with no neighbours found") 35 if (is.null(spChk)) spChk <- get.spChkOption() 36 if (spChk && !chkIDs(fx, listw)) 37 stop("Check of data and weights ID integrity failed") 38 39 wc <- spweights.constants(listw, zero.policy=zero.policy, 40 adjust.n=adjust.n) 41 S02 <- wc$S0*wc$S0 42 43 ff <- ~ fx - 1 44 dums <- model.matrix(ff, model.frame(ff)) 45 BB <- joincount(dums, listw) 46 nBB <- length(BB) 47 if (nBB < 1) stop("non-positive BB length") 48 res <- vector(mode="list", length=nBB) 49 tab <- table(fx) 50 BB5 <- 0.5 * BB 51 ntab <- as.numeric(as.vector(tab)) 52# comment and bug report by Tomoki NAKAYA about no-neighbour observations 53# if (adjust.n) { 54 N <- wc$n 55# } else { 56# N <- n 57# wc$n1 <- N-1 58# wc$n2 <- N-2 59# wc$n3 <- N-3 60# } 61 if (sampling == "nonfree") { 62 Ejc <- (wc$S0*(ntab*(ntab-1))) / (2*N*wc$n1) # CO 1981 p. 20 (1.31) 63 Vjc <- (wc$S1*(ntab*(ntab-1))) / (N*wc$n1) 64 Vjc <- Vjc + (((wc$S2 - 2*wc$S1)*ntab*(ntab-1)*(ntab-2)) / 65 (N*wc$n1*wc$n2)) 66 Vjc <- Vjc + (((S02 + wc$S1 - wc$S2)*ntab*(ntab-1)*(ntab-2)* 67 (ntab-3)) / (N*wc$n1*wc$n2*wc$n3)) 68 Vjc <- (0.25 * Vjc) - Ejc^2 # CO 1981 p. 20 (1.32) 69 } else if (sampling == "free") { 70 p <- ntab/n 71 Ejc <- (wc$S0*(p^2))/2 # CO 1981 p. 20 (1.25) 72 Vjc <- ((wc$S1*(p^2)) + 73 ((wc$S2-2*wc$S1)*(p^3)) + 74 ((wc$S1-wc$S2)*(p^4)))/4 # CO 1981 p. 20 (1.26) 75 } else stop("sampling must be nonfree or free") 76 for (i in 1:nBB) { 77 estimate <- c(BB5[i], Ejc[i], Vjc[i]) 78 names(estimate) <- c("Same colour statistic", 79 "Expectation", "Variance") 80 statistic <- (BB5[i] - Ejc[i]) / sqrt(Vjc[i]) 81 names(statistic) <- paste("Std. deviate for", names(tab)[i]) 82 p.value <- NA 83 if (is.finite(statistic)) { 84 if (alternative == "two.sided") 85 p.value <- 2 * pnorm(abs(statistic), lower.tail=FALSE) 86 else if (alternative == "greater") 87 p.value <- pnorm(statistic, lower.tail=FALSE) 88 else p.value <- pnorm(statistic) 89 if (!is.finite(p.value) || p.value < 0 || p.value > 1) 90 warning("Out-of-range p-value: reconsider test arguments") 91 } 92 method <- paste0("Join count test under ", sampling, 93 " sampling") 94 data.name <- paste(deparse(substitute(fx)), "\nweights:", 95 deparse(substitute(listw)), "\n") 96 res[[i]] <- list(statistic=statistic, p.value=p.value, 97 estimate=estimate, method=method, 98 alternative=alternative, data.name=data.name) 99 class(res[[i]]) <- "htest" 100 } 101 class(res) <- "jclist" 102 res 103} 104 105print.jclist <- function(x, ...) { 106 for (i in seq(along=x)) print(x[[i]], ...) 107 invisible(x) 108} 109 110joincount.mc <- function(fx, listw, nsim, zero.policy=FALSE, 111 alternative="greater", spChk=NULL) { 112 alternative <- match.arg(alternative, c("greater", "less")) 113 if(!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)), 114 "is not a listw object")) 115 if(!is.factor(fx)) stop(paste(deparse(substitute(fx)), 116 "is not a factor")) 117 if(missing(nsim)) stop("nsim must be given") 118 if (any(is.na(fx))) stop("NA in factor") 119 n <- length(listw$neighbours) 120 if (n != length(fx)) stop("objects of different length") 121 cards <- card(listw$neighbours) 122 if (!zero.policy && any(cards == 0)) 123 stop("regions with no neighbours found") 124 if (is.null(spChk)) spChk <- get.spChkOption() 125 if (spChk && !chkIDs(fx, listw)) 126 stop("Check of data and weights ID integrity failed") 127 gamres <- suppressWarnings(nsim > gamma(n + 1)) 128 if (gamres) stop("nsim too large for this number of observations") 129 130 ff <- ~ fx - 1 131 dums <- model.matrix(ff, model.frame(ff)) 132 nc <- ncol(dums) 133 if (nc < 1) stop("non-positive nc") 134 if (nsim < 1) stop("non-positive nsim") 135 res <- matrix(0, nrow=nsim+1, ncol=nc) 136 res[nsim+1,] <- 0.5 * joincount(dums, listw) 137 tab <- table(fx) 138 for (i in 1:nsim) { 139 fxi <- sample(fx) 140 ff <- ~ fxi - 1 141 dums <- model.matrix(ff, model.frame(ff)) 142 res[i,] <- 0.5 * joincount(dums, listw) 143 } 144 rankres <- apply(res, 2, rank) 145 xrank <- rankres[nrow(rankres),] 146 lres <- vector(mode="list", length=nc) 147 for (i in 1:nc) { 148 statistic <- res[nrow(res), i] 149 names(statistic) <- paste("Join-count statistic for", 150 names(tab)[i]) 151 parameter <- xrank[i] 152 names(parameter) <- "rank of observed statistic" 153 diff <- nsim - xrank[i] 154 diff <- ifelse(diff > 0, diff, 0) 155 if (alternative == "less") 156 pval <- punif((diff + 1)/(nsim + 1), lower.tail=FALSE) 157 else if (alternative == "greater") 158 pval <- punif((diff + 1)/(nsim + 1)) 159 if (!is.finite(pval) || pval < 0 || pval > 1) 160 warning("Out-of-range p-value: reconsider test arguments") 161 162 method <- "Monte-Carlo simulation of join-count statistic" 163 data.name <- paste(deparse(substitute(fx)), "\nweights:", 164 deparse(substitute(listw)), 165 "\nnumber of simulations + 1:", nsim+1, "\n") 166 estimate <- c(mean(res[-(nrow(res)), i]), 167 var(res[-(nrow(res)), i])) 168 names(estimate) <- c("mean of simulation", 169 "variance of simulation") 170 lres[[i]] <- list(statistic=statistic, parameter=parameter, 171 method=method, data.name=data.name, p.value=pval, 172 alternative=alternative, estimate=estimate, res=res[,i]) 173 class(lres[[i]]) <- c("htest", "mc.sim") 174 175 } 176 class(lres) <- "jclist" 177 lres 178} 179 180 181 182joincount.multi <- function(fx, listw, zero.policy=FALSE, #adjust.n=TRUE, 183 spChk=NULL, adjust.n=TRUE) { 184 if(!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)), 185 "is not a listw object")) 186 if(!is.factor(fx)) stop(paste(deparse(substitute(fx)), 187 "is not a factor")) 188 if (any(is.na(fx))) stop("NA in factor") 189 n <- length(listw$neighbours) 190 if (n != length(fx)) stop("objects of different length") 191 cards <- card(listw$neighbours) 192 if (!zero.policy && any(cards == 0)) 193 stop("regions with no neighbours found") 194 if (is.null(spChk)) spChk <- get.spChkOption() 195 if (spChk && !chkIDs(fx, listw)) 196 stop("Check of data and weights ID integrity failed") 197 ifx <- as.integer(fx) 198 k <- length(levels(fx)) 199 if (k < 2) stop("must be at least two levels in factor") 200 201 sn <- listw2sn(listw) 202 y <- factor(paste(ifx[sn[,1]], ":", ifx[sn[,2]], sep=""), 203 levels=as.vector(outer(1:k, 1:k, 204 FUN=function(X,Y) paste(X,Y,sep=":")))) 205 res <- matrix(tapply(sn[,3], y, sum), ncol=k)/2 206 207 res[is.na(res)] <- 0 208 rownames(res) <- colnames(res) <- levels(fx) 209 210 tab <- table(fx) 211 ntab <- as.numeric(as.vector(tab)) 212 wc <- spweights.constants(listw, zero.policy=zero.policy, 213 adjust.n=adjust.n) 214# comment and bug report by Tomoki NAKAYA about no-neighbour observations 215# if (adjust.n) { 216 N <- wc$n 217# } else { 218# N <- n 219# wc$n1 <- N-1 220# wc$n2 <- N-2 221# wc$n3 <- N-3 222# } 223 S02 <- wc$S0*wc$S0 224 225 Ejc <- (wc$S0*(ntab*(ntab-1))) / (2*N*wc$n1) 226 227 Vjc <- (wc$S1*(ntab*(ntab-1))) / (N*wc$n1) 228 Vjc <- Vjc + (((wc$S2 - 2*wc$S1)*ntab*(ntab-1)*(ntab-2)) / 229 (N*wc$n1*wc$n2)) 230 Vjc <- Vjc + (((S02 + wc$S1 - wc$S2)*ntab*(ntab-1)*(ntab-2)* 231 (ntab-3)) / (N*wc$n1*wc$n2*wc$n3)) 232 Vjc <- (0.25 * Vjc) - Ejc^2 233 234 nrns <- function(x, op="*") { 235 k <- length(x) 236 res <- numeric(((k^2) - k)/2) 237 ii <- 1 238 for (i in 2:k) { 239 for (j in 1:(i-1)) { 240 if (is.character(op) && op == "*") { 241 res[ii] <- x[i]*x[j] 242 } else if (is.character(op) && op == "+") { 243 res[ii] <- x[i]+x[j] 244 } 245 ii <- ii+1 246 } 247 } 248 res 249 } 250 251 ldiag <- numeric(((k^2) - k)/2) 252 diffcolnames <- character(((k^2) - k)/2) 253 ii <- 1 254 for (i in 2:k) { 255 for (j in 1:(i-1)) { 256 ldiag[ii] <- res[i,j] + res[j,i] 257 diffcolnames[ii] <- paste(levels(fx)[i], 258 levels(fx)[j], sep=":") 259 ii <- ii+1 260 } 261 } 262 263 Exp <- (wc$S0*(nrns(ntab, op="*"))) / (N*wc$n1) 264 Var <- (2*wc$S1*nrns(ntab, op="*"))/(N*wc$n1) 265 Var <- Var + (((wc$S2 - 2*wc$S1)*nrns(ntab, op="*")* 266 (nrns(ntab, op="+")-2))/(N*wc$n1*wc$n2)) 267 Var <- Var + ((4*(S02 + wc$S1 - wc$S2)*nrns((ntab*(ntab-1)), op="*")) / 268 (N*wc$n1*wc$n2*wc$n3)) 269 Var <- (0.25 * Var) - Exp^2 270 Jtot <- sum(ldiag) 271 JtotExp <- sum(Exp) 272 Jvar <- ((wc$S2/(N*wc$n1))-((4*(S02 + wc$S1 - wc$S2)*wc$n1) / 273 (N*wc$n1*wc$n2*wc$n3)))*sum(nrns(ntab, op="*")) 274 Jvar <- Jvar + 4*(((wc$S1 - wc$S2)/(N*wc$n1*wc$n2*wc$n3)) + 275 ((2*S02*(2*n-3))/((N*wc$n1)*(N*wc$n1*wc$n2*wc$n3))))* 276 sum(nrns(ntab^2, op="*")) 277 if(k>2) { 278 ntnsnr <- as.numeric(0) 279 for (r in 1:(k-2)) { 280 for (s in (r+1):(k-1)) { 281 for (t in (s+1):(k)) { 282 ntnsnr <- ntnsnr + 283 ntab[r]*ntab[s]*ntab[t] 284 } 285 } 286 } 287 Jvar <- Jvar + (((2*wc$S1 - 5*wc$S2)/(N*wc$n1*wc$n2))+ 288 ((12*(S02 + wc$S1 - wc$S2))/(N*wc$n1*wc$n2*wc$n3))+ 289 ((8*S02)/((N*wc$n1*wc$n2)*wc$n1)))*ntnsnr 290 } 291 if(k>3) { 292 nuntnsnr <- as.numeric(0) 293 for (r in 1:(k-3)) { 294 for (s in (r+1):(k-2)) { 295 for (t in (s+1):(k-1)) { 296 for (u in (t+1):(k)) { 297 nuntnsnr <- nuntnsnr + 298 ntab[r]*ntab[s]*ntab[t]*ntab[u] 299 } 300 } 301 } 302 } 303 Jvar <- Jvar - 8*(((wc$S1 - wc$S2)/(N*wc$n1*wc$n2*wc$n3))+ 304 ((2*S02*(2*N-3))/((N*wc$n1)*(N*wc$n1*wc$n2*wc$n3))))*nuntnsnr 305 } 306 Jvar <- (0.25 * Jvar) 307 statistic <- (c(diag(res), ldiag, Jtot) - c(Ejc, Exp, JtotExp)) / 308 sqrt(c(Vjc, Var, Jvar)) 309 lres <- cbind(c(diag(res), ldiag, Jtot), c(Ejc, Exp, JtotExp), 310 c(Vjc, Var, Jvar), statistic) 311 colnames(lres) <- c("Joincount", "Expected", "Variance", 312 "z-value") 313 rownames(lres) <- c(paste(levels(fx), ":", levels(fx), sep=""), 314 diffcolnames, "Jtot") 315 class(lres) <- c("jcmulti", "matrix") 316 lres 317} 318 319print.jcmulti <- function(x, ...) { 320 printCoefmat(x, ...) 321} 322 323 324 325 326