1#adapted from John Fox's Polychor 2#this does all the work 3 4 #the following two functions are called repeatedly by tetrac and are put here to speed up the process 5 6"tetraBinBvn" <- 7 function (rho,rc,cc) #adapted from John Fox's polychor 8{ row.cuts <- c(-Inf, rc, Inf) 9 col.cuts <- c(-Inf, cc, Inf) 10 P <- matrix(0, 2,2) 11 R <- matrix(c(1, rho, rho, 1), 2, 2) 12 for (i in 1:2) { 13 for (j in 1:2) { 14 P[i, j] <- pmvnorm(lower = c(row.cuts[i], col.cuts[j]), 15 upper = c(row.cuts[i + 1], col.cuts[j + 1]), 16 corr = R) 17 }} 18 P #the estimated 2 x 2 predicted by rho, rc, cc 19} 20 21"tetraF" <- 22 function(rho,cc,rc,tab) { 23 P <- tetraBinBvn(rho, cc, rc) 24 -sum(tab * log(P)) } #the ML criterion to be minimized 25 26"tetrac" <- 27function(x,y=NULL,taux,tauy,i,j,correct=TRUE,global=TRUE) { 28 29 30 if(is.null(y)) {tab <- x} else {tab <- table(x,y)} 31 if(length(tab) < 4) {warning("For i = ", i," j = ",j, " No variance for either i or j rho set to NA") 32 33 result <- list(rho=NA,tau=c(NA,NA),objective=NA) 34 } else { 35 36 if((sum(tab) > 1) && (min(tab) == 0) && correct) { 37 warning("For i = ", i," j = ",j, " A cell entry of 0 was replaced with .5. Check your data!") 38 tab[tab==0] <-.5 #correction for continuity 39 40 } 41 42 if(global) {cc <- taux 43 rc <- tauy } else { 44 tot <- sum(tab) 45 tab <- tab/tot 46 rc <- qnorm(colSums(tab))[1] 47 cc <- qnorm(rowSums(tab))[1] 48 } 49 rho <- optimize(tetraF,interval=c(-1,1),rc=rc,cc=cc,tab=tab) 50 result <- list(rho=rho$minimum,tau=c(cc,rc),objective=rho$objective) } 51 return(result) 52 } 53 54 #repeatedly do the analysis to form a matrix of output 55"tetra.mat" <- 56function(x,y=NULL,correct=TRUE,smooth=TRUE,global=TRUE) {nvar <- dim(x)[2] 57x <- x -min(x,na.rm=TRUE) #in case the numbers are not 0,1 58n.obs <- dim(x)[1] 59if(is.null(y)) { 60if(max(x,na.rm=TRUE) > 1) {stop("Tetrachoric correlations require dictomous data")} 61tau <- -qnorm(colMeans(x,na.rm=TRUE)) 62 63mat <- matrix(0,nvar,nvar) 64colnames(mat) <- rownames(mat) <- colnames(x) 65names(tau) <- colnames(x) 66#cat("\nFinding the tetrachoric correlations\n") 67for (i in 2:nvar) { 68progressBar(i^2/2,nvar^2/2,"Tetrachoric") 69 for (j in 1:(i-1)) { 70 if(t(!is.na(x[,i]))%*% (!is.na(x[,j])) > 2 ) { 71 tetra <- tetrac(x[,i],x[,j],tau[i],tau[j],i,j,correct=correct,global=global) 72 mat[i,j] <- mat[j,i] <- tetra$rho} else {mat[i,j] <- mat[j,i] <- NA} 73 } 74 } 75 diag(mat) <- 1 76 if(any(is.na(mat))) {warning("some correlations are missing, smoothing turned off") 77 smooth <- FALSE} 78 79 if(smooth) {mat <- cor.smooth(mat) } #makes it positive semidefinite 80 result <- list(rho = mat,tau = tau,n.obs=n.obs) } else { 81 82 # the case of having a y variable 83 y <- y -min(y,na.rm=TRUE) #in case the numbers are not 0,1 84 if(is.matrix(y)) {ny <- dim(y)[2] 85 tauy <- -qnorm(colMeans(y,na.rm=TRUE)) 86 n.obs.y <- dim(y)[1] 87 } else { 88 ny <- 1 89 n.obs.y <- length(y)} 90 tauy <- -qnorm(mean(y,na.rm=TRUE)) 91 y <- as.matrix(y) 92 93 if(dim(x)[1] != n.obs.y) {stop("x and y must have the same number of observations")} 94 taux <- -qnorm(colMeans(x,na.rm=TRUE)) 95 96 nx <- dim(x)[2] 97 98 mat <- matrix(0,nx,ny) 99 colnames(mat) <- colnames(y) 100 rownames(mat) <- colnames(x) 101 for (i in 1:nx) { 102 for (j in 1:ny) {tetra <- tetrac(x[,i],y[,j],taux[i],tauy[j],correct=correct) 103 mat[i,j] <- tetra$rho } 104 } 105 106 result <- list(rho = mat,tau = taux,tauy= tauy,n.obs=n.obs) 107 } 108 flush(stdout()) 109 cat("\n" ) #put in to clear the progress bar 110 return(result) 111 } 112 113 #convert comorbidity type numbers to a table 114 pqr <- function(q1,q2=NULL,p=NULL) { 115 if(length(q1) > 1) { 116 q2 <- q1[2] 117 p <- q1[3] 118 q1 <- q1[1]} 119 tab <- matrix(0,2,2) 120 tab[1,1] <- p 121 tab[2,1] <- q1-p 122 tab[1,2] <- q2-p 123 tab[2,2] <- 1-q1 - tab[1,2] 124 return(tab)} 125 126 #the public function 127 "tetrachoric" <- 128 function(x,y=NULL,correct=TRUE,smooth=TRUE,global=TRUE) { 129 130 131 132 133 if(!require(mvtnorm)) {stop("I am sorry, you must have mvtnorm installed to use tetrachoric")} 134 cl <- match.call() 135 if (!is.matrix(x) && !is.data.frame(x)) { 136 if (length(x) ==4) {x <- matrix(x,2,2) } else { 137 if(length(x) ==3 ) {x <- pqr(x) } else { 138 stop("Data must be either a 1 x 4 vector, a 2 x 2 matrix, or a data.frame/matrix of data")} 139 }} 140 nvar <- dim(x)[2] 141 n.obs <- dim(x)[1] 142 if (n.obs == nvar) {result <- tetrac(x,correct=correct,i=1,j=1,global=FALSE)} else { 143 result <- tetra.mat(x,y=y,correct=correct,smooth=smooth,global=global)} 144 145 result$Call <- cl 146 class(result) <- c("psych","tetra") 147 return(result) 148 } 149 150 "tetrachor" <- 151 function(x,correct=TRUE) { 152 if(!require(mvtnorm)) {stop("I am sorry, you must have mvtnorm installed to use tetrachor")} 153 cl <- match.call() 154 if (!is.matrix(x) && !is.data.frame(x)) { 155 if (length(x) ==4) {x <- matrix(x,2,2) } else { 156 if(length(x) ==3 ) {x <- pqr(x) } else { 157 stop("Data must be either a 1 x 4 vector, a 2 x 2 matrix, or a data.frame/matrix of data")} 158 }} 159 nvar <- dim(x)[2] 160 if (dim(x)[1] == nvar) {result <- tetrac(x,correct=correct)} else { 161 result <- tetra.mat(x,correct=correct)} 162 163 164 result$Call <- cl 165 class(result) <- c("psych","tetra") 166 return(result) 167 } 168 169 #does the work 170"biserialc" <- 171function(x,y,i,j) { 172cc <- complete.cases(x,y) 173x <- x[cc] 174y <- y[cc] 175yf <- as.factor(y) 176lev <- levels(yf) 177if(length(lev)!=2) {#stop("y is not a dichotomous variable") 178 warning("For x = ",i, " y = ", j, " y is not dichotomous") 179 r <- NA} else { 180ty <- table(y) 181 tot <- sum(ty) 182 tab <- ty/tot 183 if(length(tab) < 2) {r <- NA 184 warning("For x = ",i, " y = ", j, " no variance for y r set to NA")} else { #this treats the case of no variance in the dichotmous variable 185 zp <- dnorm(qnorm(tab[2])) 186 hi <- mean(x[y==lev[2]],na.rm=TRUE) 187 lo <- mean(x[y==lev[1]],na.ram=TRUE) 188# r <- (hi - lo)*sqrt(prod(tab))/(sd(x,na.rm=TRUE)) #point biserial 189 r <- (hi - lo)*(prod(tab))/(zp * sd(x,na.rm=TRUE)) 190 if(!is.na(r) && r >1) { r <- 1 #in case we are correlating a dichotomous variable with itself 191 warning("For x = ",i, " y = ", j, " x seems to be dichotomous, not continuous") 192 }}} 193 return(r) 194} 195 196"biserial" <- 197function(x,y) { 198x <- as.matrix(x) 199y <- as.matrix(y) 200nx <- dim(x)[2] 201ny <- dim(y)[2] 202if(is.null(nx)) nx <- 1 203if(is.null(ny)) ny <- 1 204mat <- matrix(NA,nrow=ny,ncol=nx) 205colnames(mat) <- colnames(x) 206rownames(mat) <- colnames(y) 207#cat("\n Finding the biserial correlations\n") 208for(i in 1:ny) { 209progressBar(i*(i-1)/2,ny^2/2,"Biserial") 210 for (j in 1:nx) { 211 mat[i,j] <- biserialc(x[,j],y[,i],j,i) 212 }} 213 flush(stdout()) 214 cat("\n" ) #put in to clear the progress bar 215 return(mat) 216} 217 218 219"polyserial" <- 220function(x,y) { 221# y <- matrix(y) 222 min.item <- min(y, na.rm = TRUE) 223 max.item <- max(y, na.rm = TRUE) 224 if(is.null(ncol(y))) {n.var <- 1 225 n.cases <- length(y) 226 } else {n.var <- ncol(y) 227 n.cases <- dim(y)[1]} 228 dummy <- matrix(rep(min.item:max.item, n.var), ncol = n.var) 229 colnames(dummy) <- names(y) 230 xdum <- rbind(y, dummy) 231 frequency <- apply(xdum, 2, table) 232 frequency <- t(frequency - 1) 233 responses <- rowSums(frequency) 234 frequency <- frequency/responses 235 frequency <- t(apply(frequency,1,cumsum)) 236 len <- dim(frequency)[2] 237 tau <- dnorm(qnorm(frequency[,-len,drop=FALSE])) 238 stau <- rowSums(tau) 239 240 rxy <- cor(x,y,use="pairwise") 241 sdy <- apply(y,2,sd,na.rm=TRUE) 242 rps <- t(rxy) * sqrt((n.cases-1)/n.cases) * sdy/stau 243 rps[rps > 1.0] <- 1.0 244 rps[rps < -1.0] <- -1.0 245 return(rps) 246 } 247 248 249 250 251 252 253 254"cor.smooth" <- function(x) { 255eigens <- eigen(x) 256if(min(eigens$values) < .Machine$double.eps) {warning("Matrix was not positive definite, smoothing was done") 257eigens$values[eigens$values < .Machine$double.eps] <- 100 * .Machine$double.eps 258nvar <- dim(x)[1] 259tot <- sum(eigens$values) 260eigens$values <- eigens$values * nvar/tot 261cnames <- colnames(x) 262rnames <- rownames(x) 263x <- eigens$vectors %*% diag(eigens$values) %*% t(eigens$vectors) 264x <- cov2cor(x) 265colnames(x) <- cnames 266rownames(x) <- rnames} 267return(x)} 268