1#adapted from John Fox's Polychor
2#this does all the work
3
4 #the following two functions are called repeatedly by tetrac and are put here to speed up the process
5
6"tetraBinBvn" <-
7 function (rho,rc,cc)    #adapted from John Fox's polychor
8{ row.cuts <- c(-Inf, rc, Inf)
9    col.cuts <- c(-Inf, cc, Inf)
10    P <- matrix(0, 2,2)
11    R <- matrix(c(1, rho, rho, 1), 2, 2)
12    for (i in 1:2) {
13        for (j in 1:2) {
14            P[i, j] <- pmvnorm(lower = c(row.cuts[i], col.cuts[j]),
15                upper = c(row.cuts[i + 1], col.cuts[j + 1]),
16                corr = R)
17        }}
18    P   #the estimated 2 x 2 predicted by rho, rc, cc
19}
20
21"tetraF" <-
22 function(rho,cc,rc,tab) {
23      P <- tetraBinBvn(rho, cc, rc)
24       -sum(tab * log(P)) }  #the ML criterion to be minimized
25
26"tetrac" <-
27function(x,y=NULL,taux,tauy,i,j,correct=TRUE,global=TRUE) {
28
29
30 if(is.null(y)) {tab <- x} else {tab <- table(x,y)}
31 if(length(tab) < 4) {warning("For i = ", i," j = ",j, "  No variance for either i or j   rho set to NA")
32
33            result <- list(rho=NA,tau=c(NA,NA),objective=NA)
34              }  else {
35
36 if((sum(tab) > 1) && (min(tab) == 0) && correct) {
37    warning("For i = ", i," j = ",j, "  A cell entry of 0 was replaced with .5.  Check your data!")
38    tab[tab==0] <-.5  #correction for continuity
39
40    }
41
42  if(global) {cc <- taux
43              rc <- tauy } else {
44  tot <- sum(tab)
45  tab <- tab/tot
46  rc <- qnorm(colSums(tab))[1]
47  cc <- qnorm(rowSums(tab))[1]
48  }
49  rho <- optimize(tetraF,interval=c(-1,1),rc=rc,cc=cc,tab=tab)
50  result <- list(rho=rho$minimum,tau=c(cc,rc),objective=rho$objective) }
51  return(result)
52  }
53
54 #repeatedly do the analysis to form a matrix of output
55"tetra.mat" <-
56function(x,y=NULL,correct=TRUE,smooth=TRUE,global=TRUE) {nvar <- dim(x)[2]
57x <- x -min(x,na.rm=TRUE) #in case the numbers are not 0,1
58n.obs <- dim(x)[1]
59if(is.null(y)) {
60if(max(x,na.rm=TRUE) > 1) {stop("Tetrachoric correlations require dictomous data")}
61tau <- -qnorm(colMeans(x,na.rm=TRUE))
62
63mat <- matrix(0,nvar,nvar)
64colnames(mat) <- rownames(mat) <- colnames(x)
65names(tau) <- colnames(x)
66#cat("\nFinding the tetrachoric correlations\n")
67for (i in 2:nvar) {
68progressBar(i^2/2,nvar^2/2,"Tetrachoric")
69  for (j in 1:(i-1)) {
70  if(t(!is.na(x[,i]))%*% (!is.na(x[,j]))  > 2 ) {
71  tetra <-  tetrac(x[,i],x[,j],tau[i],tau[j],i,j,correct=correct,global=global)
72   mat[i,j] <- mat[j,i] <- tetra$rho} else {mat[i,j] <- mat[j,i] <- NA}
73   }
74   }
75   diag(mat) <- 1
76   if(any(is.na(mat))) {warning("some correlations are missing, smoothing turned off")
77                        smooth <- FALSE}
78
79  if(smooth) {mat <- cor.smooth(mat) }  #makes it positive semidefinite
80  result <- list(rho = mat,tau = tau,n.obs=n.obs) } else {
81
82      # the case of having a y variable
83      y <- y -min(y,na.rm=TRUE) #in case the numbers are not 0,1
84      if(is.matrix(y)) {ny <- dim(y)[2]
85           tauy <- -qnorm(colMeans(y,na.rm=TRUE))
86           n.obs.y <- dim(y)[1]
87             } else {
88             	ny <- 1
89           		n.obs.y <- length(y)}
90           	tauy <- -qnorm(mean(y,na.rm=TRUE))
91           y <- as.matrix(y)
92
93      if(dim(x)[1] != n.obs.y)  {stop("x and y must have the same number of observations")}
94      taux <- -qnorm(colMeans(x,na.rm=TRUE))
95
96      nx <- dim(x)[2]
97
98      mat <- matrix(0,nx,ny)
99      colnames(mat) <- colnames(y)
100      rownames(mat) <- colnames(x)
101      for (i in 1:nx) {
102        for (j in 1:ny) {tetra <-  tetrac(x[,i],y[,j],taux[i],tauy[j],correct=correct)
103        mat[i,j] <- tetra$rho }
104         }
105
106    result <-   list(rho = mat,tau = taux,tauy= tauy,n.obs=n.obs)
107     }
108 flush(stdout())
109  cat("\n" )  #put in to clear the progress bar
110  return(result)
111  }
112
113 #convert comorbidity type numbers to a table
114 pqr <- function(q1,q2=NULL,p=NULL) {
115    if(length(q1) > 1) {
116       q2 <- q1[2]
117       p <- q1[3]
118       q1 <- q1[1]}
119   tab <- matrix(0,2,2)
120   tab[1,1] <- p
121   tab[2,1] <- q1-p
122   tab[1,2] <- q2-p
123   tab[2,2] <- 1-q1 - tab[1,2]
124   return(tab)}
125
126 #the public function
127 "tetrachoric" <-
128 function(x,y=NULL,correct=TRUE,smooth=TRUE,global=TRUE) {
129
130
131
132
133 if(!require(mvtnorm)) {stop("I am sorry, you must have mvtnorm installed to use tetrachoric")}
134 cl <- match.call()
135 if (!is.matrix(x) && !is.data.frame(x)) {
136  if (length(x) ==4) {x <- matrix(x,2,2) } else {
137  if(length(x) ==3 ) {x <- pqr(x) } else {
138  stop("Data must be either a 1 x 4 vector, a 2 x 2 matrix, or a data.frame/matrix of data")}
139  }}
140  nvar <- dim(x)[2]
141  n.obs <- dim(x)[1]
142 if (n.obs == nvar) {result <- tetrac(x,correct=correct,i=1,j=1,global=FALSE)} else {
143 result <- tetra.mat(x,y=y,correct=correct,smooth=smooth,global=global)}
144
145 result$Call <- cl
146 class(result) <- c("psych","tetra")
147 return(result)
148 }
149
150 "tetrachor" <-
151 function(x,correct=TRUE) {
152 if(!require(mvtnorm)) {stop("I am sorry, you must have mvtnorm installed to use tetrachor")}
153 cl <- match.call()
154 if (!is.matrix(x) && !is.data.frame(x)) {
155  if (length(x) ==4) {x <- matrix(x,2,2) } else {
156  if(length(x) ==3 ) {x <- pqr(x) } else {
157  stop("Data must be either a 1 x 4 vector, a 2 x 2 matrix, or a data.frame/matrix of data")}
158  }}
159  nvar <- dim(x)[2]
160 if (dim(x)[1] == nvar) {result <- tetrac(x,correct=correct)} else {
161 result <- tetra.mat(x,correct=correct)}
162
163
164 result$Call <- cl
165 class(result) <- c("psych","tetra")
166 return(result)
167 }
168
169  #does the work
170"biserialc" <-
171function(x,y,i,j) {
172cc <- complete.cases(x,y)
173x <- x[cc]
174y <- y[cc]
175yf <- as.factor(y)
176lev <- levels(yf)
177if(length(lev)!=2) {#stop("y is not a dichotomous variable")
178                   warning("For x = ",i, " y = ", j, " y is not dichotomous")
179                    r <- NA} else {
180ty <- table(y)
181 tot <- sum(ty)
182 tab <- ty/tot
183 if(length(tab) < 2) {r <- NA
184                    warning("For x = ",i, " y = ", j, " no variance for y   r set to NA")} else { #this treats the case of no variance in the dichotmous variable
185 zp <- dnorm(qnorm(tab[2]))
186 hi <- mean(x[y==lev[2]],na.rm=TRUE)
187 lo <- mean(x[y==lev[1]],na.ram=TRUE)
188# r <- (hi - lo)*sqrt(prod(tab))/(sd(x,na.rm=TRUE))  #point biserial
189 r <- (hi - lo)*(prod(tab))/(zp * sd(x,na.rm=TRUE))
190 if(!is.na(r) && r >1) { r <- 1   #in case we are correlating a dichotomous variable with itself
191          warning("For x = ",i, " y = ", j, " x seems to be dichotomous, not continuous")
192  }}}
193 return(r)
194}
195
196"biserial" <-
197function(x,y) {
198x <- as.matrix(x)
199y <- as.matrix(y)
200nx <- dim(x)[2]
201ny <- dim(y)[2]
202if(is.null(nx)) nx <- 1
203if(is.null(ny)) ny <- 1
204mat <- matrix(NA,nrow=ny,ncol=nx)
205colnames(mat) <- colnames(x)
206rownames(mat) <- colnames(y)
207#cat("\n Finding the biserial correlations\n")
208for(i in 1:ny) {
209progressBar(i*(i-1)/2,ny^2/2,"Biserial")
210   for (j in 1:nx) {
211    mat[i,j] <- biserialc(x[,j],y[,i],j,i)
212    }}
213  flush(stdout())
214  cat("\n" )  #put in to clear the progress bar
215   return(mat)
216}
217
218
219"polyserial" <-
220function(x,y) {
221# y <- matrix(y)
222   min.item <- min(y, na.rm = TRUE)
223   max.item <- max(y, na.rm = TRUE)
224 if(is.null(ncol(y)))  {n.var <- 1
225                        n.cases <- length(y)
226                } else {n.var <- ncol(y)
227        n.cases <- dim(y)[1]}
228        dummy <- matrix(rep(min.item:max.item, n.var), ncol = n.var)
229        colnames(dummy) <- names(y)
230        xdum <- rbind(y, dummy)
231        frequency <- apply(xdum, 2, table)
232        frequency <- t(frequency - 1)
233        responses <- rowSums(frequency)
234        frequency <- frequency/responses
235        frequency <- t(apply(frequency,1,cumsum))
236        len <- dim(frequency)[2]
237        tau <- dnorm(qnorm(frequency[,-len,drop=FALSE]))
238        stau <- rowSums(tau)
239
240    rxy <- cor(x,y,use="pairwise")
241    sdy <- apply(y,2,sd,na.rm=TRUE)
242    rps <- t(rxy) * sqrt((n.cases-1)/n.cases) * sdy/stau
243    rps[rps > 1.0] <- 1.0
244    rps[rps < -1.0] <- -1.0
245 return(rps)
246 }
247
248
249
250
251
252
253
254"cor.smooth" <- function(x) {
255eigens <- eigen(x)
256if(min(eigens$values) < .Machine$double.eps)  {warning("Matrix was not positive definite, smoothing was done")
257eigens$values[eigens$values  < .Machine$double.eps] <- 100 * .Machine$double.eps
258nvar <- dim(x)[1]
259tot <- sum(eigens$values)
260eigens$values <- eigens$values * nvar/tot
261cnames <- colnames(x)
262rnames <- rownames(x)
263x <- eigens$vectors %*% diag(eigens$values) %*% t(eigens$vectors)
264x <- cov2cor(x)
265colnames(x) <- cnames
266rownames(x) <- rnames}
267return(x)}
268