1# pROC: Tools Receiver operating characteristic (ROC curves) with
2# (partial) area under the curve, confidence intervals and comparison.
3# Copyright (C) 2010-2014 Xavier Robin, Alexandre Hainard, Natacha Turck,
4# Natalia Tiberti, Frédérique Lisacek, Jean-Charles Sanchez,
5# Markus Müller
6#
7# This program is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20# Delong's test paired, used by roc.test.roc
21delong.paired.test <- function(calcs) {
22
23	# Input calcs is a list returned by delong.paired.calculations().
24
25	zscore <- with(calcs, d/sig)
26
27	if (is.nan(zscore) && calcs$d == 0 && calcs$sig == 0)
28		zscore <- 0 # special case: no difference between theta's produces a NaN
29
30  return(zscore)
31}
32
33# Delong's test unpaired, used by roc.test.roc
34delong.unpaired.test <- function(roc1, roc2) {
35
36  nR <- length(roc1$controls)
37  mR <- length(roc1$cases)
38
39  nS <- length(roc2$controls)
40  mS <- length(roc2$cases)
41
42  VR <- delongPlacements(roc1)
43  VS <- delongPlacements(roc2)
44
45  SRX <- sum((VR$X - VR$theta) * (VR$X - VR$theta))/(mR-1)
46  SSX <- sum((VS$X - VS$theta) * (VS$X - VS$theta))/(mS-1)
47
48  SRY <- sum((VR$Y - VR$theta) * (VR$Y - VR$theta))/(nR-1)
49  SSY <- sum((VS$Y - VS$theta) * (VS$Y - VS$theta))/(nS-1)
50
51  SR <- SRX/mR + SRY/nR
52  SS <- SSX/mS + SSY/nS
53
54  ntotR <- nR + mR
55  ntotS <- nS + mS
56  SSR <- sqrt((SR) + (SS))
57  t <- (VR$theta - VS$theta) / SSR
58  df <- ((SR) + (SS))^2 /
59    (((SR)^2 / (ntotR-1)) + ((SS)^2 / (ntotS -1 )))
60
61  return(c(t, df))
62}
63
64ci.auc.delong <- function(roc, conf.level) {
65  YR <- roc$controls # = C2, n, YRj
66  XR <- roc$cases # = C1, m, XRi
67
68  n <- length(YR)
69  m <- length(XR)
70
71  # If controls or cases have a single observation, we would produce NaNs in SX and SY
72  if (m <= 1 || n <= 1) {
73  	return(rep(NA, 3))
74  }
75
76  V <- delongPlacements(roc)
77
78  SX <- sum((V$X - V$theta) * (V$X - V$theta))/(m-1)
79  SY <- sum((V$Y - V$theta) * (V$Y - V$theta))/(n-1)
80  S <- SX/m + SY/n
81  ci <- qnorm(c(0+(1-conf.level)/2, .5, 1-(1-conf.level)/2), mean = V$theta, sd = sqrt(S))
82  # In some rare cases we have ci[3] > 1 or ci[1] < 0
83  ci[ci > 1] <- 1
84  ci[ci < 0] <- 0
85
86  # According to Pepe (p. 107), we should probably be doing something like
87  # log(roc$auc / (1 - roc$auc)) + pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))
88  # log(roc$auc / (1 - roc$auc)) - pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))
89  # for logit AUC, so that for AUC:
90  # exp(log(roc$auc / (1 - roc$auc)) + pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))) * (1 - roc$auc)
91  # exp(log(roc$auc / (1 - roc$auc)) - pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))) * (1 - roc$auc)
92  # However the bounds are very very much smaller (about 10 times) than bootstrap, which seems unrealistic
93  # Stay with normal conf interval for now.
94
95  return(ci)
96}
97
98# function to calculate the CI
99ci.delong.paired <- function(calcs, conf.level) {
100
101	# Input calcs is a list generated by delong.paired.calculations().
102	# CI is calculated using the normally distributed pivot given in
103	# DeLong's 1988 paper.
104
105	crit_z <- qnorm(1 - ((1 - conf.level)/2))
106	out <- list()
107	out$upper <- with(calcs, d + crit_z * sig)
108	out$lower <- with(calcs, d - crit_z * sig)
109	out$level <- conf.level
110	return(out)
111}
112
113# Runs the placements and main calculations for the paired DeLong's test
114#  so that they can be easily used by both the test and CI functions.
115delong.paired.calculations <- function(roc1, roc2) {
116	n <- length(roc1$controls)
117	m <- length(roc1$cases)
118
119	VR <- delongPlacements(roc1)
120	VS <- delongPlacements(roc2)
121
122	SX <- matrix(NA, ncol=2, nrow=2)
123	SX[1,1] <- sum((VR$X - VR$theta) * (VR$X - VR$theta))/(m-1)
124	SX[1,2] <- sum((VR$X - VR$theta) * (VS$X - VS$theta))/(m-1)
125	SX[2,1] <- sum((VS$X - VS$theta) * (VR$X - VR$theta))/(m-1)
126	SX[2,2] <- sum((VS$X - VS$theta) * (VS$X - VS$theta))/(m-1)
127
128	SY <- matrix(NA, ncol=2, nrow=2)
129	SY[1,1] <- sum((VR$Y - VR$theta) * (VR$Y - VR$theta))/(n-1)
130	SY[1,2] <- sum((VR$Y - VR$theta) * (VS$Y - VS$theta))/(n-1)
131	SY[2,1] <- sum((VS$Y - VS$theta) * (VR$Y - VR$theta))/(n-1)
132	SY[2,2] <- sum((VS$Y - VS$theta) * (VS$Y - VS$theta))/(n-1)
133
134	S <- SX/m + SY/n
135	L <- c(1,-1)
136	sig <- sqrt(L%*%S%*%L)
137	d <- VR$theta - VS$theta
138	return(list("d" = d, "sig" = sig[[1]]))
139}
140
141# Calls delongPlacementsCpp safely
142# Ensures that the theta value calculated is correct
143delongPlacements <- function(roc) {
144	placements <- delongPlacementsCpp(roc)
145
146	# Ensure theta equals auc
147	auc <- roc$auc / ifelse(roc$percent, 100, 1)
148	if (! isTRUE(all.equal(placements$theta, auc))) {
149		sessionInfo <- sessionInfo()
150		save(roc, placements, sessionInfo, file="pROC_bug.RData")
151		stop(sprintf("pROC: error in calculating DeLong's theta: got %.20f instead of %.20f. Diagnostic data saved in pROC_bug.RData. Please report this bug to <%s>.", placements$theta, auc, utils:: packageDescription("pROC")$BugReports))
152	}
153
154	return(placements)
155}
156