1# Copyright 2001-6 by Roger Bivand, bugfix large n Ronnie Babigumira
2#
3
4joincount <- function(dums, listw) {
5	nc <- which(colSums(dums) > 1)
6#	n <- length(listw$neighbours)
7	cardnb <- card(listw$neighbours)
8	res <- as.numeric(rep(0, ncol(dums)))
9	for (lev in nc) {
10		res[lev] <- .Call("jcintern", listw$neighbours,
11			listw$weights, as.integer(dums[,lev]),
12			as.integer(cardnb), PACKAGE="spdep")
13	}
14	res
15}
16
17joincount.test <- function(fx, listw, zero.policy=NULL,
18	alternative="greater", sampling="nonfree",
19	spChk=NULL, adjust.n=TRUE) {
20        if (is.null(zero.policy))
21            zero.policy <- get("zeroPolicy", envir = .spdepOptions)
22        stopifnot(is.logical(zero.policy))
23	alternative <- match.arg(alternative, c("greater", "less", "two.sided"))
24	sampling <- match.arg(sampling, c("nonfree", "free"))
25        if (!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)),
26		"is not a listw object"))
27	if (!is.factor(fx)) stop(paste(deparse(substitute(x)),
28		"is not a factor"))
29	if (any(is.na(fx))) stop("NA in factor")
30	n <- length(listw$neighbours)
31	if (n != length(fx)) stop("objects of different length")
32	cards <- card(listw$neighbours)
33	if (!zero.policy && any(cards == 0))
34		stop("regions with no neighbours found")
35	if (is.null(spChk)) spChk <- get.spChkOption()
36	if (spChk && !chkIDs(fx, listw))
37		stop("Check of data and weights ID integrity failed")
38
39	wc <- spweights.constants(listw, zero.policy=zero.policy,
40		adjust.n=adjust.n)
41	S02 <- wc$S0*wc$S0
42
43	ff <- ~ fx - 1
44	dums <- model.matrix(ff, model.frame(ff))
45	BB <- joincount(dums, listw)
46	nBB <- length(BB)
47	if (nBB < 1) stop("non-positive BB length")
48	res <- vector(mode="list", length=nBB)
49	tab <- table(fx)
50	BB5 <- 0.5 * BB
51	ntab <- as.numeric(as.vector(tab))
52# comment and bug report by Tomoki NAKAYA about no-neighbour observations
53#	if (adjust.n) {
54		N <- wc$n
55#	} else {
56#		N <- n
57#		wc$n1 <- N-1
58#		wc$n2 <- N-2
59#		wc$n3 <- N-3
60#	}
61        if (sampling == "nonfree") {
62	  Ejc <- (wc$S0*(ntab*(ntab-1))) / (2*N*wc$n1) # CO 1981 p. 20 (1.31)
63	  Vjc <- (wc$S1*(ntab*(ntab-1))) / (N*wc$n1)
64	  Vjc <- Vjc + (((wc$S2 - 2*wc$S1)*ntab*(ntab-1)*(ntab-2)) /
65		(N*wc$n1*wc$n2))
66	  Vjc <- Vjc + (((S02 + wc$S1 - wc$S2)*ntab*(ntab-1)*(ntab-2)*
67		(ntab-3)) / (N*wc$n1*wc$n2*wc$n3))
68	  Vjc <- (0.25 * Vjc) - Ejc^2 # CO 1981 p. 20 (1.32)
69        } else if (sampling == "free") {
70          p <- ntab/n
71          Ejc <- (wc$S0*(p^2))/2 # CO 1981 p. 20 (1.25)
72          Vjc <- ((wc$S1*(p^2)) +
73            ((wc$S2-2*wc$S1)*(p^3)) +
74            ((wc$S1-wc$S2)*(p^4)))/4 # CO 1981 p. 20 (1.26)
75        } else stop("sampling must be nonfree or free")
76	for (i in 1:nBB) {
77		estimate <- c(BB5[i], Ejc[i], Vjc[i])
78		names(estimate) <- c("Same colour statistic",
79			"Expectation", "Variance")
80		statistic <- (BB5[i] - Ejc[i]) / sqrt(Vjc[i])
81		names(statistic) <- paste("Std. deviate for", names(tab)[i])
82		p.value <- NA
83		if (is.finite(statistic)) {
84		    if (alternative == "two.sided")
85			p.value <- 2 * pnorm(abs(statistic), lower.tail=FALSE)
86		    else if (alternative == "greater")
87			p.value <- pnorm(statistic, lower.tail=FALSE)
88		    else p.value <- pnorm(statistic)
89		    if (!is.finite(p.value) || p.value < 0 || p.value > 1)
90		      warning("Out-of-range p-value: reconsider test arguments")
91		}
92		method <- paste0("Join count test under ", sampling,
93                  " sampling")
94		data.name <- paste(deparse(substitute(fx)), "\nweights:",
95			deparse(substitute(listw)), "\n")
96		res[[i]] <- list(statistic=statistic, p.value=p.value,
97			estimate=estimate, method=method,
98			alternative=alternative, data.name=data.name)
99		class(res[[i]]) <- "htest"
100	}
101	class(res) <- "jclist"
102	res
103}
104
105print.jclist <- function(x, ...) {
106	for (i in seq(along=x)) print(x[[i]], ...)
107	invisible(x)
108}
109
110joincount.mc <- function(fx, listw, nsim, zero.policy=FALSE,
111	alternative="greater", spChk=NULL) {
112	alternative <- match.arg(alternative, c("greater", "less"))
113	if(!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)),
114		"is not a listw object"))
115	if(!is.factor(fx)) stop(paste(deparse(substitute(fx)),
116		"is not a factor"))
117	if(missing(nsim)) stop("nsim must be given")
118	if (any(is.na(fx))) stop("NA in factor")
119	n <- length(listw$neighbours)
120	if (n != length(fx)) stop("objects of different length")
121	cards <- card(listw$neighbours)
122	if (!zero.policy && any(cards == 0))
123		stop("regions with no neighbours found")
124	if (is.null(spChk)) spChk <- get.spChkOption()
125	if (spChk && !chkIDs(fx, listw))
126		stop("Check of data and weights ID integrity failed")
127        gamres <- suppressWarnings(nsim > gamma(n + 1))
128        if (gamres) stop("nsim too large for this number of observations")
129
130	ff <- ~ fx - 1
131	dums <- model.matrix(ff, model.frame(ff))
132	nc <- ncol(dums)
133	if (nc < 1) stop("non-positive nc")
134	if (nsim < 1) stop("non-positive nsim")
135	res <- matrix(0, nrow=nsim+1, ncol=nc)
136	res[nsim+1,] <- 0.5 * joincount(dums, listw)
137	tab <- table(fx)
138	for (i in 1:nsim) {
139		fxi <- sample(fx)
140		ff <- ~ fxi - 1
141		dums <- model.matrix(ff, model.frame(ff))
142		res[i,] <- 0.5 * joincount(dums, listw)
143	}
144	rankres <- apply(res, 2, rank)
145	xrank <- rankres[nrow(rankres),]
146	lres <- vector(mode="list", length=nc)
147	for (i in 1:nc) {
148		statistic <- res[nrow(res), i]
149		names(statistic) <- paste("Join-count statistic for",
150			names(tab)[i])
151		parameter <- xrank[i]
152		names(parameter) <- "rank of observed statistic"
153		diff <- nsim - xrank[i]
154		diff <- ifelse(diff > 0, diff, 0)
155        	if (alternative == "less")
156        		pval <- punif((diff + 1)/(nsim + 1), lower.tail=FALSE)
157    		else if (alternative == "greater")
158        		pval <- punif((diff + 1)/(nsim + 1))
159		if (!is.finite(pval) || pval < 0 || pval > 1)
160		    warning("Out-of-range p-value: reconsider test arguments")
161
162		method <- "Monte-Carlo simulation of join-count statistic"
163		data.name <- paste(deparse(substitute(fx)), "\nweights:",
164			deparse(substitute(listw)),
165			"\nnumber of simulations + 1:", nsim+1, "\n")
166		estimate <- c(mean(res[-(nrow(res)), i]),
167			var(res[-(nrow(res)), i]))
168		names(estimate) <- c("mean of simulation",
169			"variance of simulation")
170		lres[[i]] <- list(statistic=statistic, parameter=parameter,
171			method=method, data.name=data.name, p.value=pval,
172			alternative=alternative, estimate=estimate, res=res[,i])
173		class(lres[[i]]) <- c("htest", "mc.sim")
174
175	}
176	class(lres) <- "jclist"
177	lres
178}
179
180
181
182joincount.multi <- function(fx, listw, zero.policy=FALSE, #adjust.n=TRUE,
183	spChk=NULL, adjust.n=TRUE) {
184	if(!inherits(listw, "listw")) stop(paste(deparse(substitute(listw)),
185		"is not a listw object"))
186	if(!is.factor(fx)) stop(paste(deparse(substitute(fx)),
187		"is not a factor"))
188	if (any(is.na(fx))) stop("NA in factor")
189	n <- length(listw$neighbours)
190	if (n != length(fx)) stop("objects of different length")
191	cards <- card(listw$neighbours)
192	if (!zero.policy && any(cards == 0))
193		stop("regions with no neighbours found")
194	if (is.null(spChk)) spChk <- get.spChkOption()
195	if (spChk && !chkIDs(fx, listw))
196		stop("Check of data and weights ID integrity failed")
197	ifx <- as.integer(fx)
198	k <- length(levels(fx))
199	if (k < 2) stop("must be at least two levels in factor")
200
201	sn <- listw2sn(listw)
202	y <- factor(paste(ifx[sn[,1]], ":", ifx[sn[,2]], sep=""),
203		levels=as.vector(outer(1:k, 1:k,
204			FUN=function(X,Y) paste(X,Y,sep=":"))))
205	res <- matrix(tapply(sn[,3], y, sum), ncol=k)/2
206
207	res[is.na(res)] <- 0
208	rownames(res) <- colnames(res) <- levels(fx)
209
210	tab <- table(fx)
211	ntab <- as.numeric(as.vector(tab))
212	wc <- spweights.constants(listw, zero.policy=zero.policy,
213		adjust.n=adjust.n)
214# comment and bug report by Tomoki NAKAYA about no-neighbour observations
215#	if (adjust.n) {
216		N <- wc$n
217#	} else {
218#		N <- n
219#		wc$n1 <- N-1
220#		wc$n2 <- N-2
221#		wc$n3 <- N-3
222#	}
223	S02 <- wc$S0*wc$S0
224
225	Ejc <- (wc$S0*(ntab*(ntab-1))) / (2*N*wc$n1)
226
227	Vjc <- (wc$S1*(ntab*(ntab-1))) / (N*wc$n1)
228	Vjc <- Vjc + (((wc$S2 - 2*wc$S1)*ntab*(ntab-1)*(ntab-2)) /
229		(N*wc$n1*wc$n2))
230	Vjc <- Vjc + (((S02 + wc$S1 - wc$S2)*ntab*(ntab-1)*(ntab-2)*
231		(ntab-3)) / (N*wc$n1*wc$n2*wc$n3))
232	Vjc <- (0.25 * Vjc) - Ejc^2
233
234	nrns <- function(x, op="*") {
235		k <- length(x)
236		res <- numeric(((k^2) - k)/2)
237		ii <- 1
238		for (i in 2:k) {
239			for (j in 1:(i-1)) {
240				if (is.character(op) && op == "*") {
241					res[ii] <- x[i]*x[j]
242				} else if (is.character(op) && op == "+") {
243					res[ii] <- x[i]+x[j]
244				}
245				ii <- ii+1
246			}
247		}
248		res
249	}
250
251	ldiag <- numeric(((k^2) - k)/2)
252	diffcolnames <- character(((k^2) - k)/2)
253	ii <- 1
254	for (i in 2:k) {
255		for (j in 1:(i-1)) {
256			ldiag[ii] <- res[i,j] + res[j,i]
257			diffcolnames[ii] <- paste(levels(fx)[i],
258				levels(fx)[j], sep=":")
259			ii <- ii+1
260		}
261	}
262
263	Exp <- (wc$S0*(nrns(ntab, op="*"))) / (N*wc$n1)
264	Var <- (2*wc$S1*nrns(ntab, op="*"))/(N*wc$n1)
265	Var <- Var + (((wc$S2 - 2*wc$S1)*nrns(ntab, op="*")*
266		(nrns(ntab, op="+")-2))/(N*wc$n1*wc$n2))
267	Var <- Var + ((4*(S02 + wc$S1 - wc$S2)*nrns((ntab*(ntab-1)), op="*")) /
268		(N*wc$n1*wc$n2*wc$n3))
269	Var <- (0.25 * Var) - Exp^2
270	Jtot <- sum(ldiag)
271	JtotExp <- sum(Exp)
272	Jvar <- ((wc$S2/(N*wc$n1))-((4*(S02 + wc$S1 - wc$S2)*wc$n1) /
273		(N*wc$n1*wc$n2*wc$n3)))*sum(nrns(ntab, op="*"))
274	Jvar <- Jvar + 4*(((wc$S1 - wc$S2)/(N*wc$n1*wc$n2*wc$n3)) +
275		((2*S02*(2*n-3))/((N*wc$n1)*(N*wc$n1*wc$n2*wc$n3))))*
276		sum(nrns(ntab^2, op="*"))
277	if(k>2) {
278		ntnsnr <- as.numeric(0)
279		for (r in 1:(k-2)) {
280			for (s in (r+1):(k-1)) {
281				for (t in (s+1):(k)) {
282					ntnsnr <- ntnsnr +
283						ntab[r]*ntab[s]*ntab[t]
284				}
285			}
286		}
287		Jvar <- Jvar + (((2*wc$S1 - 5*wc$S2)/(N*wc$n1*wc$n2))+
288		((12*(S02 + wc$S1 - wc$S2))/(N*wc$n1*wc$n2*wc$n3))+
289		((8*S02)/((N*wc$n1*wc$n2)*wc$n1)))*ntnsnr
290	}
291	if(k>3) {
292		nuntnsnr <- as.numeric(0)
293		for (r in 1:(k-3)) {
294			for (s in (r+1):(k-2)) {
295				for (t in (s+1):(k-1)) {
296					for (u in (t+1):(k)) {
297						nuntnsnr <- nuntnsnr +
298						ntab[r]*ntab[s]*ntab[t]*ntab[u]
299					}
300				}
301			}
302		}
303		Jvar <- Jvar - 8*(((wc$S1 - wc$S2)/(N*wc$n1*wc$n2*wc$n3))+
304		((2*S02*(2*N-3))/((N*wc$n1)*(N*wc$n1*wc$n2*wc$n3))))*nuntnsnr
305	}
306	Jvar <- (0.25 * Jvar)
307	statistic <- (c(diag(res), ldiag, Jtot) - c(Ejc, Exp, JtotExp)) /
308		sqrt(c(Vjc, Var, Jvar))
309	lres <- cbind(c(diag(res), ldiag, Jtot), c(Ejc, Exp, JtotExp),
310		c(Vjc, Var, Jvar), statistic)
311	colnames(lres) <- c("Joincount", "Expected", "Variance",
312		"z-value")
313	rownames(lres) <- c(paste(levels(fx), ":", levels(fx), sep=""),
314		diffcolnames, "Jtot")
315	class(lres) <- c("jcmulti", "matrix")
316	lres
317}
318
319print.jcmulti <- function(x, ...) {
320	printCoefmat(x, ...)
321}
322
323
324
325
326