1"set.cor" <- 2function(y,x,data,z=NULL,n.obs=NULL,use="pairwise") { 3 #a function to extract subsets of variables (a and b) from a correlation matrix m or data set m 4 #and find the multiple correlation beta weights + R2 of the a set predicting the b set 5 #seriously rewritten, March 24, 2009 to make much simpler 6 #minor additons, October, 20, 2009 to allow for print and summary function 7 #major addition in April, 2011 to allow for set correlation 8 #added calculation of residual matrix December 30, 2011 9 cl <- match.call() 10 11 if(!is.matrix(data)) data <- as.matrix(data) 12 if(dim(data)[1]!=dim(data)[2]) {n.obs=dim(data)[1] #this does not take into account missing data 13 C <- cov(data,use=use) 14 m <- cov2cor(C) 15 raw <- TRUE} else { 16 raw <- FALSE 17 C <-data 18 m <- cov2cor(C)} 19 20 21 nm <- dim(data)[1] 22 xy <- c(x,y) 23 numx <- length(x) 24 numy <- length(y) 25 numz <- 0 26 nxy <- numx+numy 27 m.matrix <- m[c(x,y),c(x,y)] 28 x.matrix <- m[x,x,drop=FALSE] 29 xc.matrix <- C[x,x,drop=FALSE] 30 xy.matrix <- m[x,y,drop=FALSE] 31 xyc.matrix <- C[x,y,drop=FALSE] 32 y.matrix <- m[y,y,drop=FALSE] 33 if(!is.null(z)){numz <- length(z) 34 zm <- m[z,z,drop=FALSE] 35 za <- m[x,z,drop=FALSE] 36 zb <- m[y,z,drop=FALSE] 37 x.matrix <- x.matrix - za %*% solve(zm) %*% t(za) 38 y.matrix <- y.matrix - zb %*% solve(zm) %*% t(zb) 39 xy.matrix <- xy.matrix - za %*% solve(zm) %*% t(zb) 40 m.matrix <- cbind(rbind(y.matrix,xy.matrix),rbind(t(xy.matrix),x.matrix)) 41 42 } 43 if(numx == 1 ) {beta <- matrix(xy.matrix,nrow=1) 44 } else #this is the case of a single x 45 { beta <- solve(x.matrix,xy.matrix) #solve the equation bY~aX 46 beta <- as.matrix(beta) } 47 48 yhat <- t(xy.matrix) %*% solve(x.matrix) %*% (xy.matrix) 49 resid <- y.matrix - yhat 50 if (numy >1 ) { if(is.null(rownames(beta))) {rownames(beta) <- colnames(m)[x]} 51 if(is.null(colnames(beta))) {colnames(beta) <- colnames(m)[y]} 52 53 R2 <- colSums(beta * xy.matrix) } else { colnames(beta) <- colnames(data)[y] 54 R2 <- sum(beta * xy.matrix) 55 names(beta) <- colnames(data)[x] 56 names(R2) <- colnames(data)[y]} 57 if(numy < 2) {Rset <- 1 - det(m.matrix)/(det(x.matrix) ) 58 Myx <- solve(x.matrix) %*% xy.matrix %*% t(xy.matrix) 59 cc2 <- cc <- T <- NULL} else {if (numx < 2) {Rset <- 1 - det(m.matrix)/(det(y.matrix) ) 60 Myx <- xy.matrix %*% solve(y.matrix) %*% t(xy.matrix) 61 cc2 <- cc <- T <- NULL} else {Rset <- 1 - det(m.matrix)/(det(x.matrix) * det(y.matrix)) 62 if(numy > numx) { 63 Myx <- solve(x.matrix) %*% xy.matrix %*% solve(y.matrix) %*% t(xy.matrix)} else { Myx <- solve(y.matrix) %*% t(xy.matrix )%*% solve(x.matrix) %*% (xy.matrix)} 64 } 65 cc2 <- eigen(Myx)$values 66 cc <- sqrt(cc2) 67 T <- sum(cc2)/length(cc2) 68 } 69 70 if(!is.null(n.obs)) {k<- length(x) 71 72 uniq <- (1-smc(x.matrix)) 73 se.beta <- list() 74 for (i in 1:length(y)) { 75 df <- n.obs-k-1 76 se.beta[[i]] <- (sqrt((1-R2[i])/(df))*sqrt(1/uniq))} 77 se <- matrix(unlist(se.beta),ncol=length(y)) 78 colnames(se) <- colnames(beta) 79 rownames(se) <- rownames(beta) 80 tvalue <- beta/se 81 se <- t(t(se) * sqrt(diag(C)[y]))/sqrt(diag(xc.matrix)) 82 83 prob <- 2*(1- pt(abs(tvalue),df)) 84 SE2 <- 4*R2*(1-R2)^2*(df^2)/((n.obs^2-1)*(n.obs+3)) 85 SE =sqrt(SE2) 86 F <- R2*df/(k*(1-R2)) 87 pF <- 1 - pf(F,k,df) 88 shrunkenR2 <- 1-(1-R2)*(n.obs-1)/df 89 90 #find the shrunken R2 for set cor (taken from CCAW p 615) 91 u <- numx * numy 92 m1 <- n.obs - max(numy ,(numx+numz)) - (numx + numy +3)/2 93 94 s <- sqrt((numx ^2 * numy^2 -4)/(numx^2 + numy^2-5)) 95 if(numx*numy ==4) s <- 1 96 97 v <- m1 * s + 1 - u/2 98 R2set.shrunk <- 1 - (1-Rset) * ((v+u)/v)^s 99 L <- 1-Rset 100 L1s <- L^(-1/s) 101 Rset.F <- (L1s-1)*(v/u) 102 df.m <- n.obs - max(numy ,(numx+numz)) -(numx + numy +3)/2 103 s1 <- sqrt((numx ^2 * numy^2 -4)/(numx^2 + numy^2-5)) #see cohen p 321 104 if(numx^2*numy^2 < 5) s1 <- 1 105 df.v <- df.m * s1 + 1 - numx * numy/2 #which is just v 106 # df.v <- (u+v) #adjusted for bias to match the CCAW results 107 #Rset.F <- Rset.F * (u+v)/v 108 109 Chisq <- -(n.obs - 1 -(numx + numy +1)/2)*log((1-cc2)) 110 111 } 112 113 if(numx == 1) {beta <- beta * sqrt(diag(C)[y]) 114 } else {beta <- t(t(beta) * sqrt(diag(C)[y]))/sqrt(diag(xc.matrix))} #this puts the betas into the raw units 115 116 if(is.null(n.obs)) {set.cor <- list(beta=beta,R=sqrt(R2),R2=R2,Rset=Rset,T=T,cancor = cc, cancor2=cc2,raw=raw,residual=resid,Call = cl)} else { 117 set.cor <- list(beta=beta,se=se,t=tvalue,Probability = prob,R=sqrt(R2),R2=R2,shrunkenR2 = shrunkenR2,seR2 = SE,F=F,probF=pF,df=c(k,df),Rset=Rset,Rset.shrunk=R2set.shrunk,Rset.F=Rset.F,Rsetu=u,Rsetv=df.v,T=T,cancor=cc,cancor2 = cc2,Chisq = Chisq,raw=raw,residual=resid,Call = cl)} 118 class(set.cor) <- c("psych","set.cor") 119 return(set.cor) 120 } 121#modified July 12,2007 to allow for NA in the overall matrix 122#modified July 9, 2008 to give statistical tests 123#modified yet again August 15 , 2008 to convert covariances to correlations 124#modified January 3, 2011 to work in the case of a single predictor 125#modified April 25, 2011 to add the set correlation (from Cohen) 126