1##S function somers2
2##
3##    Calculates concordance probability and Somers'  Dxy  rank  correlation
4##    between  a  variable  X  (for  which  ties are counted) and a binary
5##    variable Y (having values 0 and 1, for which ties are not  counted).
6##    Uses short cut method based on average ranks in two groups.
7##
8##    Usage:
9##
10##         somers2(x, y, weights)
11##
12##    Returns vector whose elements are C Index, Dxy, n and missing, where
13##    C Index is the concordance probability and Dxy=2(C Index-.5).
14##
15##    F. Harrell 28 Nov 90     6 Apr 98: added weights
16
17somers2 <- function(x, y, weights=NULL, normwt=FALSE, na.rm=TRUE)
18{
19  if(length(y)!=length(x))stop("y must have same length as x")
20  y <- as.integer(y)
21  wtpres <- length(weights)
22  if(wtpres && (wtpres != length(x)))
23    stop('weights must have same length as x')
24
25  if(na.rm)
26    {
27      miss <- if(wtpres) is.na(x + y + weights)
28      else is.na(x + y)
29
30      nmiss <- sum(miss)
31      if(nmiss>0)
32        {
33          miss <- !miss
34          x <- x[miss]
35          y <- y[miss]
36          if(wtpres) weights <- weights[miss]
37        }
38    }
39  else nmiss <- 0
40
41  if(any(y %nin% 0:1)) stop('y must be binary')
42
43  if(wtpres)
44    {
45      if(normwt)
46        weights <- length(x)*weights/sum(weights)
47      n <- sum(weights)
48    }
49  else n <- length(x)
50
51  if(n<2) stop("must have >=2 non-missing observations")
52
53  n1 <- if(wtpres)sum(weights[y==1]) else sum(y==1)
54
55  if(n1==0 || n1==n)
56    return(c(C=NA,Dxy=NA,n=n,Missing=nmiss))
57
58  mean.rank <-
59    if(wtpres)
60      wtd.mean(wtd.rank(x, weights, na.rm=FALSE), weights*y)
61    else
62      mean(rank(x)[y==1])
63
64  c.index <- (mean.rank - (n1+1)/2)/(n-n1)
65  dxy <- 2*(c.index-.5)
66  r <- c(c.index, dxy, n, nmiss)
67  names(r) <- c("C","Dxy","n","Missing")
68  r
69}
70
71
72if(FALSE) rcorrs <- function(x, y, weights=rep(1,length(y)),
73                             method=c('exact','bin'), nbin=1000,
74                             na.rm=TRUE)
75{
76  ## Experimental function - probably don't need
77
78  method <- match.arg(method)
79
80  if(na.rm) {
81    s <- !is.na(x + oldUnclass(y) + weights)
82    x <- x[s]; y <- y[s]; weights <- weights[s]
83  }
84
85  n <- length(x)
86  if(missing(method))
87    method <- if(n < 1000) 'exact'
88              else 'bin'
89
90  y <- as.category(y);
91  nly <- length(levels(y))
92  if(method=='bin') {
93    r <- range(x); d <- r[2] - r[1]
94    x <- 1 + trunc((nbin-1)*(x - r[1])/d)
95
96    xy <- y*nbin + x
97
98    ## Code below is lifted from rowsum()
99    storage.mode(weights) <- "double"
100    temp <-
101      if(.R.)
102        .C('R_rowsum', dd=as.integer(dd),
103           as.double(max(1,weights)*n),
104           x=weights, as.double(xy), PACKAGE='base')
105      else
106        .C("S_rowsum",
107           dd = as.integer(c(n,1)),
108           as.double(max(1,weights)*n),
109           x = weights,
110           as.double(xy))   ## 3Jun01
111
112    new.n <- temp$dd[1]
113    weights <- temp$x[1:new.n]
114
115    uxy <- unique(xy)
116    x <- uxy %% nbin
117    y <- (uxy - x)/nbin
118    n <- length(x)
119  }
120
121  list(x=x, y=y, weights=weights)
122
123  #storage.mode(x) <- "single"
124  #storage.mode(y) <- "single"
125  #storage.mode(event) <- "logical"
126
127  ## wcidxy doesn't exist yet
128  z <- .Fortran("wcidxy",as.single(x),as.single(y),as.integer(weights),as.integer(n),
129                nrel=double(1),nconc=double(1),nuncert=double(1),
130                c.index=double(1),gamma=double(1),sd=double(1),as.logical(outx))
131  r <- c(z$c.index,z$gamma,z$sd,n,z$nrel,z$nconc,z$nuncert)
132  names(r) <- c("C Index","Dxy","S.D.","n","missing","uncensored",
133                "Relevant Pairs",	"Concordant","Uncertain")
134  r
135}
136