#adapted from John Fox's Polychor #this does all the work #the following two functions are called repeatedly by tetrac and are put here to speed up the process "tetraBinBvn" <- function (rho,rc,cc) #adapted from John Fox's polychor { row.cuts <- c(-Inf, rc, Inf) col.cuts <- c(-Inf, cc, Inf) P <- matrix(0, 2,2) R <- matrix(c(1, rho, rho, 1), 2, 2) for (i in 1:2) { for (j in 1:2) { P[i, j] <- pmvnorm(lower = c(row.cuts[i], col.cuts[j]), upper = c(row.cuts[i + 1], col.cuts[j + 1]), corr = R) }} P #the estimated 2 x 2 predicted by rho, rc, cc } "tetraF" <- function(rho,cc,rc,tab) { P <- tetraBinBvn(rho, cc, rc) -sum(tab * log(P)) } #the ML criterion to be minimized "tetrac" <- function(x,y=NULL,taux,tauy,i,j,correct=TRUE,global=TRUE) { if(is.null(y)) {tab <- x} else {tab <- table(x,y)} if(length(tab) < 4) {warning("For i = ", i," j = ",j, " No variance for either i or j rho set to NA") result <- list(rho=NA,tau=c(NA,NA),objective=NA) } else { if((sum(tab) > 1) && (min(tab) == 0) && correct) { warning("For i = ", i," j = ",j, " A cell entry of 0 was replaced with .5. Check your data!") tab[tab==0] <-.5 #correction for continuity } if(global) {cc <- taux rc <- tauy } else { tot <- sum(tab) tab <- tab/tot rc <- qnorm(colSums(tab))[1] cc <- qnorm(rowSums(tab))[1] } rho <- optimize(tetraF,interval=c(-1,1),rc=rc,cc=cc,tab=tab) result <- list(rho=rho$minimum,tau=c(cc,rc),objective=rho$objective) } return(result) } #repeatedly do the analysis to form a matrix of output "tetra.mat" <- function(x,y=NULL,correct=TRUE,smooth=TRUE,global=TRUE) {nvar <- dim(x)[2] x <- x -min(x,na.rm=TRUE) #in case the numbers are not 0,1 n.obs <- dim(x)[1] if(is.null(y)) { if(max(x,na.rm=TRUE) > 1) {stop("Tetrachoric correlations require dictomous data")} tau <- -qnorm(colMeans(x,na.rm=TRUE)) mat <- matrix(0,nvar,nvar) colnames(mat) <- rownames(mat) <- colnames(x) names(tau) <- colnames(x) #cat("\nFinding the tetrachoric correlations\n") for (i in 2:nvar) { progressBar(i^2/2,nvar^2/2,"Tetrachoric") for (j in 1:(i-1)) { if(t(!is.na(x[,i]))%*% (!is.na(x[,j])) > 2 ) { tetra <- tetrac(x[,i],x[,j],tau[i],tau[j],i,j,correct=correct,global=global) mat[i,j] <- mat[j,i] <- tetra$rho} else {mat[i,j] <- mat[j,i] <- NA} } } diag(mat) <- 1 if(any(is.na(mat))) {warning("some correlations are missing, smoothing turned off") smooth <- FALSE} if(smooth) {mat <- cor.smooth(mat) } #makes it positive semidefinite result <- list(rho = mat,tau = tau,n.obs=n.obs) } else { # the case of having a y variable y <- y -min(y,na.rm=TRUE) #in case the numbers are not 0,1 if(is.matrix(y)) {ny <- dim(y)[2] tauy <- -qnorm(colMeans(y,na.rm=TRUE)) n.obs.y <- dim(y)[1] } else { ny <- 1 n.obs.y <- length(y)} tauy <- -qnorm(mean(y,na.rm=TRUE)) y <- as.matrix(y) if(dim(x)[1] != n.obs.y) {stop("x and y must have the same number of observations")} taux <- -qnorm(colMeans(x,na.rm=TRUE)) nx <- dim(x)[2] mat <- matrix(0,nx,ny) colnames(mat) <- colnames(y) rownames(mat) <- colnames(x) for (i in 1:nx) { for (j in 1:ny) {tetra <- tetrac(x[,i],y[,j],taux[i],tauy[j],correct=correct) mat[i,j] <- tetra$rho } } result <- list(rho = mat,tau = taux,tauy= tauy,n.obs=n.obs) } flush(stdout()) cat("\n" ) #put in to clear the progress bar return(result) } #convert comorbidity type numbers to a table pqr <- function(q1,q2=NULL,p=NULL) { if(length(q1) > 1) { q2 <- q1[2] p <- q1[3] q1 <- q1[1]} tab <- matrix(0,2,2) tab[1,1] <- p tab[2,1] <- q1-p tab[1,2] <- q2-p tab[2,2] <- 1-q1 - tab[1,2] return(tab)} #the public function "tetrachoric" <- function(x,y=NULL,correct=TRUE,smooth=TRUE,global=TRUE) { if(!require(mvtnorm)) {stop("I am sorry, you must have mvtnorm installed to use tetrachoric")} cl <- match.call() if (!is.matrix(x) && !is.data.frame(x)) { if (length(x) ==4) {x <- matrix(x,2,2) } else { if(length(x) ==3 ) {x <- pqr(x) } else { stop("Data must be either a 1 x 4 vector, a 2 x 2 matrix, or a data.frame/matrix of data")} }} nvar <- dim(x)[2] n.obs <- dim(x)[1] if (n.obs == nvar) {result <- tetrac(x,correct=correct,i=1,j=1,global=FALSE)} else { result <- tetra.mat(x,y=y,correct=correct,smooth=smooth,global=global)} result$Call <- cl class(result) <- c("psych","tetra") return(result) } "tetrachor" <- function(x,correct=TRUE) { if(!require(mvtnorm)) {stop("I am sorry, you must have mvtnorm installed to use tetrachor")} cl <- match.call() if (!is.matrix(x) && !is.data.frame(x)) { if (length(x) ==4) {x <- matrix(x,2,2) } else { if(length(x) ==3 ) {x <- pqr(x) } else { stop("Data must be either a 1 x 4 vector, a 2 x 2 matrix, or a data.frame/matrix of data")} }} nvar <- dim(x)[2] if (dim(x)[1] == nvar) {result <- tetrac(x,correct=correct)} else { result <- tetra.mat(x,correct=correct)} result$Call <- cl class(result) <- c("psych","tetra") return(result) } #does the work "biserialc" <- function(x,y,i,j) { cc <- complete.cases(x,y) x <- x[cc] y <- y[cc] yf <- as.factor(y) lev <- levels(yf) if(length(lev)!=2) {#stop("y is not a dichotomous variable") warning("For x = ",i, " y = ", j, " y is not dichotomous") r <- NA} else { ty <- table(y) tot <- sum(ty) tab <- ty/tot if(length(tab) < 2) {r <- NA warning("For x = ",i, " y = ", j, " no variance for y r set to NA")} else { #this treats the case of no variance in the dichotmous variable zp <- dnorm(qnorm(tab[2])) hi <- mean(x[y==lev[2]],na.rm=TRUE) lo <- mean(x[y==lev[1]],na.ram=TRUE) # r <- (hi - lo)*sqrt(prod(tab))/(sd(x,na.rm=TRUE)) #point biserial r <- (hi - lo)*(prod(tab))/(zp * sd(x,na.rm=TRUE)) if(!is.na(r) && r >1) { r <- 1 #in case we are correlating a dichotomous variable with itself warning("For x = ",i, " y = ", j, " x seems to be dichotomous, not continuous") }}} return(r) } "biserial" <- function(x,y) { x <- as.matrix(x) y <- as.matrix(y) nx <- dim(x)[2] ny <- dim(y)[2] if(is.null(nx)) nx <- 1 if(is.null(ny)) ny <- 1 mat <- matrix(NA,nrow=ny,ncol=nx) colnames(mat) <- colnames(x) rownames(mat) <- colnames(y) #cat("\n Finding the biserial correlations\n") for(i in 1:ny) { progressBar(i*(i-1)/2,ny^2/2,"Biserial") for (j in 1:nx) { mat[i,j] <- biserialc(x[,j],y[,i],j,i) }} flush(stdout()) cat("\n" ) #put in to clear the progress bar return(mat) } "polyserial" <- function(x,y) { # y <- matrix(y) min.item <- min(y, na.rm = TRUE) max.item <- max(y, na.rm = TRUE) if(is.null(ncol(y))) {n.var <- 1 n.cases <- length(y) } else {n.var <- ncol(y) n.cases <- dim(y)[1]} dummy <- matrix(rep(min.item:max.item, n.var), ncol = n.var) colnames(dummy) <- names(y) xdum <- rbind(y, dummy) frequency <- apply(xdum, 2, table) frequency <- t(frequency - 1) responses <- rowSums(frequency) frequency <- frequency/responses frequency <- t(apply(frequency,1,cumsum)) len <- dim(frequency)[2] tau <- dnorm(qnorm(frequency[,-len,drop=FALSE])) stau <- rowSums(tau) rxy <- cor(x,y,use="pairwise") sdy <- apply(y,2,sd,na.rm=TRUE) rps <- t(rxy) * sqrt((n.cases-1)/n.cases) * sdy/stau rps[rps > 1.0] <- 1.0 rps[rps < -1.0] <- -1.0 return(rps) } "cor.smooth" <- function(x) { eigens <- eigen(x) if(min(eigens$values) < .Machine$double.eps) {warning("Matrix was not positive definite, smoothing was done") eigens$values[eigens$values < .Machine$double.eps] <- 100 * .Machine$double.eps nvar <- dim(x)[1] tot <- sum(eigens$values) eigens$values <- eigens$values * nvar/tot cnames <- colnames(x) rnames <- rownames(x) x <- eigens$vectors %*% diag(eigens$values) %*% t(eigens$vectors) x <- cov2cor(x) colnames(x) <- cnames rownames(x) <- rnames} return(x)}