1"set.cor" <-
2function(y,x,data,z=NULL,n.obs=NULL,use="pairwise")  {
3 #a function to extract subsets of variables (a and b) from a correlation matrix m or data set m
4  #and find the multiple correlation beta weights + R2 of the a set predicting the b set
5  #seriously rewritten, March 24, 2009 to make much simpler
6  #minor additons, October, 20, 2009 to allow for print and summary function
7  #major addition in April, 2011 to allow for set correlation
8  #added calculation of residual matrix December 30, 2011
9   cl <- match.call()
10
11  if(!is.matrix(data)) data <- as.matrix(data)
12  if(dim(data)[1]!=dim(data)[2]) {n.obs=dim(data)[1]   #this does not take into account missing data
13                    C <- cov(data,use=use)
14                    m <- cov2cor(C)
15                    raw <- TRUE}  else {
16                    raw <- FALSE
17                    C <-data
18                    m <- cov2cor(C)}
19
20
21        nm <- dim(data)[1]
22        xy <- c(x,y)
23        numx <- length(x)
24     	numy <- length(y)
25     	numz <- 0
26        nxy <- numx+numy
27        m.matrix <- m[c(x,y),c(x,y)]
28     	x.matrix <- m[x,x,drop=FALSE]
29     	xc.matrix <- C[x,x,drop=FALSE]
30     	xy.matrix <- m[x,y,drop=FALSE]
31     	xyc.matrix <- C[x,y,drop=FALSE]
32     	y.matrix <- m[y,y,drop=FALSE]
33     	if(!is.null(z)){numz <- length(z)
34     	                zm <- m[z,z,drop=FALSE]
35     	                 za <- m[x,z,drop=FALSE]
36     	                 zb <- m[y,z,drop=FALSE]
37     	                 x.matrix <- x.matrix - za %*% solve(zm) %*% t(za)
38     	                 y.matrix <- y.matrix - zb %*% solve(zm) %*% t(zb)
39     	                xy.matrix <- xy.matrix - za  %*% solve(zm) %*% t(zb)
40     	                m.matrix <- cbind(rbind(y.matrix,xy.matrix),rbind(t(xy.matrix),x.matrix))
41
42     	                 }
43     	if(numx == 1 ) {beta <- matrix(xy.matrix,nrow=1)
44     	                } else   #this is the case of a single x
45      				 { beta <- solve(x.matrix,xy.matrix)       #solve the equation bY~aX
46       					 beta <- as.matrix(beta) }
47
48       	yhat <- t(xy.matrix) %*% solve(x.matrix) %*% (xy.matrix)
49       	resid <- y.matrix - yhat
50     	if (numy >1 ) { if(is.null(rownames(beta))) {rownames(beta) <- colnames(m)[x]}
51     	                if(is.null(colnames(beta))) {colnames(beta) <- colnames(m)[y]}
52
53     	 R2 <- colSums(beta * xy.matrix) } else { colnames(beta) <- colnames(data)[y]
54     		 R2 <- sum(beta * xy.matrix)
55     	 	names(beta) <- colnames(data)[x]
56     		 names(R2) <- colnames(data)[y]}
57     	if(numy < 2) {Rset <- 1 - det(m.matrix)/(det(x.matrix) )
58     	             Myx <- solve(x.matrix) %*% xy.matrix  %*% t(xy.matrix)
59     	             cc2 <- cc <- T <- NULL} else {if (numx < 2) {Rset <- 1 - det(m.matrix)/(det(y.matrix) )
60     	            Myx <-  xy.matrix %*% solve(y.matrix) %*% t(xy.matrix)
61     	            cc2 <- cc <- T <- NULL} else {Rset <- 1 - det(m.matrix)/(det(x.matrix) * det(y.matrix))
62     	            if(numy > numx) {
63     	            Myx <- solve(x.matrix) %*% xy.matrix %*% solve(y.matrix) %*% t(xy.matrix)} else { Myx <- solve(y.matrix) %*% t(xy.matrix )%*% solve(x.matrix) %*% (xy.matrix)}
64     	           }
65     	            cc2 <- eigen(Myx)$values
66     	            cc <- sqrt(cc2)
67     	            T <- sum(cc2)/length(cc2)
68     	            }
69
70     	if(!is.null(n.obs)) {k<- length(x)
71
72     	                     uniq <- (1-smc(x.matrix))
73     	                     se.beta <- list()
74     	                     for (i in 1:length(y)) {
75     	                     df <- n.obs-k-1
76     	                     se.beta[[i]] <- (sqrt((1-R2[i])/(df))*sqrt(1/uniq))}
77     	                     se <- matrix(unlist(se.beta),ncol=length(y))
78     	                     colnames(se) <- colnames(beta)
79     	                     rownames(se) <- rownames(beta)
80     	                     tvalue <- beta/se
81     	                     se <- t(t(se) * sqrt(diag(C)[y]))/sqrt(diag(xc.matrix))
82
83     	                prob <- 2*(1- pt(abs(tvalue),df))
84     	                     SE2 <- 4*R2*(1-R2)^2*(df^2)/((n.obs^2-1)*(n.obs+3))
85     	                     SE =sqrt(SE2)
86     	                     F <- R2*df/(k*(1-R2))
87     	                     pF <- 1 - pf(F,k,df)
88     	                     shrunkenR2 <- 1-(1-R2)*(n.obs-1)/df
89
90     	               #find the shrunken R2 for set cor  (taken from CCAW p 615)
91     	                     u <- numx * numy
92     	                     m1 <- n.obs - max(numy ,(numx+numz)) - (numx + numy +3)/2
93
94     	                     s <- sqrt((numx ^2 * numy^2 -4)/(numx^2 + numy^2-5))
95     	                     if(numx*numy ==4) s <- 1
96
97     	                     v <- m1 * s + 1 - u/2
98     	                     R2set.shrunk <- 1 - (1-Rset) * ((v+u)/v)^s
99     	                     L <- 1-Rset
100     	                     L1s <- L^(-1/s)
101     	                     Rset.F <- (L1s-1)*(v/u)
102     	                     df.m <- n.obs -  max(numy ,(numx+numz))  -(numx + numy +3)/2
103     	                      s1 <- sqrt((numx ^2 * numy^2 -4)/(numx^2 + numy^2-5)) #see cohen p 321
104     	                     if(numx^2*numy^2 < 5) s1 <- 1
105     	                     df.v <- df.m * s1 + 1 - numx * numy/2 #which is just v
106     	                    # df.v <- (u+v)  #adjusted for bias to match the CCAW results
107     	                    #Rset.F <- Rset.F * (u+v)/v
108
109     	                    Chisq <- -(n.obs - 1 -(numx + numy +1)/2)*log((1-cc2))
110
111     	                              }
112
113     	if(numx == 1) {beta <-  beta * sqrt(diag(C)[y])
114     	   } else {beta <-  t(t(beta) * sqrt(diag(C)[y]))/sqrt(diag(xc.matrix))} #this puts the betas into the raw units
115
116     	if(is.null(n.obs)) {set.cor <- list(beta=beta,R=sqrt(R2),R2=R2,Rset=Rset,T=T,cancor = cc, cancor2=cc2,raw=raw,residual=resid,Call = cl)} else {
117     	              set.cor <- list(beta=beta,se=se,t=tvalue,Probability = prob,R=sqrt(R2),R2=R2,shrunkenR2 = shrunkenR2,seR2 = SE,F=F,probF=pF,df=c(k,df),Rset=Rset,Rset.shrunk=R2set.shrunk,Rset.F=Rset.F,Rsetu=u,Rsetv=df.v,T=T,cancor=cc,cancor2 = cc2,Chisq = Chisq,raw=raw,residual=resid,Call = cl)}
118     	class(set.cor) <- c("psych","set.cor")
119     	return(set.cor)
120     	}
121#modified July 12,2007 to allow for NA in the overall matrix
122#modified July 9, 2008 to give statistical tests
123#modified yet again August 15 , 2008 to convert covariances to correlations
124#modified January 3, 2011 to work in the case of a single predictor
125#modified April 25, 2011 to add the set correlation (from Cohen)
126