1##S function somers2 2## 3## Calculates concordance probability and Somers' Dxy rank correlation 4## between a variable X (for which ties are counted) and a binary 5## variable Y (having values 0 and 1, for which ties are not counted). 6## Uses short cut method based on average ranks in two groups. 7## 8## Usage: 9## 10## somers2(x, y, weights) 11## 12## Returns vector whose elements are C Index, Dxy, n and missing, where 13## C Index is the concordance probability and Dxy=2(C Index-.5). 14## 15## F. Harrell 28 Nov 90 6 Apr 98: added weights 16 17somers2 <- function(x, y, weights=NULL, normwt=FALSE, na.rm=TRUE) 18{ 19 if(length(y)!=length(x))stop("y must have same length as x") 20 y <- as.integer(y) 21 wtpres <- length(weights) 22 if(wtpres && (wtpres != length(x))) 23 stop('weights must have same length as x') 24 25 if(na.rm) 26 { 27 miss <- if(wtpres) is.na(x + y + weights) 28 else is.na(x + y) 29 30 nmiss <- sum(miss) 31 if(nmiss>0) 32 { 33 miss <- !miss 34 x <- x[miss] 35 y <- y[miss] 36 if(wtpres) weights <- weights[miss] 37 } 38 } 39 else nmiss <- 0 40 41 if(any(y %nin% 0:1)) stop('y must be binary') 42 43 if(wtpres) 44 { 45 if(normwt) 46 weights <- length(x)*weights/sum(weights) 47 n <- sum(weights) 48 } 49 else n <- length(x) 50 51 if(n<2) stop("must have >=2 non-missing observations") 52 53 n1 <- if(wtpres)sum(weights[y==1]) else sum(y==1) 54 55 if(n1==0 || n1==n) 56 return(c(C=NA,Dxy=NA,n=n,Missing=nmiss)) 57 58 mean.rank <- 59 if(wtpres) 60 wtd.mean(wtd.rank(x, weights, na.rm=FALSE), weights*y) 61 else 62 mean(rank(x)[y==1]) 63 64 c.index <- (mean.rank - (n1+1)/2)/(n-n1) 65 dxy <- 2*(c.index-.5) 66 r <- c(c.index, dxy, n, nmiss) 67 names(r) <- c("C","Dxy","n","Missing") 68 r 69} 70 71 72if(FALSE) rcorrs <- function(x, y, weights=rep(1,length(y)), 73 method=c('exact','bin'), nbin=1000, 74 na.rm=TRUE) 75{ 76 ## Experimental function - probably don't need 77 78 method <- match.arg(method) 79 80 if(na.rm) { 81 s <- !is.na(x + oldUnclass(y) + weights) 82 x <- x[s]; y <- y[s]; weights <- weights[s] 83 } 84 85 n <- length(x) 86 if(missing(method)) 87 method <- if(n < 1000) 'exact' 88 else 'bin' 89 90 y <- as.category(y); 91 nly <- length(levels(y)) 92 if(method=='bin') { 93 r <- range(x); d <- r[2] - r[1] 94 x <- 1 + trunc((nbin-1)*(x - r[1])/d) 95 96 xy <- y*nbin + x 97 98 ## Code below is lifted from rowsum() 99 storage.mode(weights) <- "double" 100 temp <- 101 if(.R.) 102 .C('R_rowsum', dd=as.integer(dd), 103 as.double(max(1,weights)*n), 104 x=weights, as.double(xy), PACKAGE='base') 105 else 106 .C("S_rowsum", 107 dd = as.integer(c(n,1)), 108 as.double(max(1,weights)*n), 109 x = weights, 110 as.double(xy)) ## 3Jun01 111 112 new.n <- temp$dd[1] 113 weights <- temp$x[1:new.n] 114 115 uxy <- unique(xy) 116 x <- uxy %% nbin 117 y <- (uxy - x)/nbin 118 n <- length(x) 119 } 120 121 list(x=x, y=y, weights=weights) 122 123 #storage.mode(x) <- "single" 124 #storage.mode(y) <- "single" 125 #storage.mode(event) <- "logical" 126 127 ## wcidxy doesn't exist yet 128 z <- .Fortran("wcidxy",as.single(x),as.single(y),as.integer(weights),as.integer(n), 129 nrel=double(1),nconc=double(1),nuncert=double(1), 130 c.index=double(1),gamma=double(1),sd=double(1),as.logical(outx)) 131 r <- c(z$c.index,z$gamma,z$sd,n,z$nrel,z$nconc,z$nuncert) 132 names(r) <- c("C Index","Dxy","S.D.","n","missing","uncensored", 133 "Relevant Pairs", "Concordant","Uncertain") 134 r 135} 136