1# pROC: Tools Receiver operating characteristic (ROC curves) with 2# (partial) area under the curve, confidence intervals and comparison. 3# Copyright (C) 2010-2014 Xavier Robin, Alexandre Hainard, Natacha Turck, 4# Natalia Tiberti, Frédérique Lisacek, Jean-Charles Sanchez, 5# Markus Müller 6# 7# This program is free software: you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation, either version 3 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program. If not, see <http://www.gnu.org/licenses/>. 19 20# Delong's test paired, used by roc.test.roc 21delong.paired.test <- function(calcs) { 22 23 # Input calcs is a list returned by delong.paired.calculations(). 24 25 zscore <- with(calcs, d/sig) 26 27 if (is.nan(zscore) && calcs$d == 0 && calcs$sig == 0) 28 zscore <- 0 # special case: no difference between theta's produces a NaN 29 30 return(zscore) 31} 32 33# Delong's test unpaired, used by roc.test.roc 34delong.unpaired.test <- function(roc1, roc2) { 35 36 nR <- length(roc1$controls) 37 mR <- length(roc1$cases) 38 39 nS <- length(roc2$controls) 40 mS <- length(roc2$cases) 41 42 VR <- delongPlacements(roc1) 43 VS <- delongPlacements(roc2) 44 45 SRX <- sum((VR$X - VR$theta) * (VR$X - VR$theta))/(mR-1) 46 SSX <- sum((VS$X - VS$theta) * (VS$X - VS$theta))/(mS-1) 47 48 SRY <- sum((VR$Y - VR$theta) * (VR$Y - VR$theta))/(nR-1) 49 SSY <- sum((VS$Y - VS$theta) * (VS$Y - VS$theta))/(nS-1) 50 51 SR <- SRX/mR + SRY/nR 52 SS <- SSX/mS + SSY/nS 53 54 ntotR <- nR + mR 55 ntotS <- nS + mS 56 SSR <- sqrt((SR) + (SS)) 57 t <- (VR$theta - VS$theta) / SSR 58 df <- ((SR) + (SS))^2 / 59 (((SR)^2 / (ntotR-1)) + ((SS)^2 / (ntotS -1 ))) 60 61 return(c(t, df)) 62} 63 64ci.auc.delong <- function(roc, conf.level) { 65 YR <- roc$controls # = C2, n, YRj 66 XR <- roc$cases # = C1, m, XRi 67 68 n <- length(YR) 69 m <- length(XR) 70 71 # If controls or cases have a single observation, we would produce NaNs in SX and SY 72 if (m <= 1 || n <= 1) { 73 return(rep(NA, 3)) 74 } 75 76 V <- delongPlacements(roc) 77 78 SX <- sum((V$X - V$theta) * (V$X - V$theta))/(m-1) 79 SY <- sum((V$Y - V$theta) * (V$Y - V$theta))/(n-1) 80 S <- SX/m + SY/n 81 ci <- qnorm(c(0+(1-conf.level)/2, .5, 1-(1-conf.level)/2), mean = V$theta, sd = sqrt(S)) 82 # In some rare cases we have ci[3] > 1 or ci[1] < 0 83 ci[ci > 1] <- 1 84 ci[ci < 0] <- 0 85 86 # According to Pepe (p. 107), we should probably be doing something like 87 # log(roc$auc / (1 - roc$auc)) + pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc))) 88 # log(roc$auc / (1 - roc$auc)) - pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc))) 89 # for logit AUC, so that for AUC: 90 # exp(log(roc$auc / (1 - roc$auc)) + pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))) * (1 - roc$auc) 91 # exp(log(roc$auc / (1 - roc$auc)) - pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))) * (1 - roc$auc) 92 # However the bounds are very very much smaller (about 10 times) than bootstrap, which seems unrealistic 93 # Stay with normal conf interval for now. 94 95 return(ci) 96} 97 98# function to calculate the CI 99ci.delong.paired <- function(calcs, conf.level) { 100 101 # Input calcs is a list generated by delong.paired.calculations(). 102 # CI is calculated using the normally distributed pivot given in 103 # DeLong's 1988 paper. 104 105 crit_z <- qnorm(1 - ((1 - conf.level)/2)) 106 out <- list() 107 out$upper <- with(calcs, d + crit_z * sig) 108 out$lower <- with(calcs, d - crit_z * sig) 109 out$level <- conf.level 110 return(out) 111} 112 113# Runs the placements and main calculations for the paired DeLong's test 114# so that they can be easily used by both the test and CI functions. 115delong.paired.calculations <- function(roc1, roc2) { 116 n <- length(roc1$controls) 117 m <- length(roc1$cases) 118 119 VR <- delongPlacements(roc1) 120 VS <- delongPlacements(roc2) 121 122 SX <- matrix(NA, ncol=2, nrow=2) 123 SX[1,1] <- sum((VR$X - VR$theta) * (VR$X - VR$theta))/(m-1) 124 SX[1,2] <- sum((VR$X - VR$theta) * (VS$X - VS$theta))/(m-1) 125 SX[2,1] <- sum((VS$X - VS$theta) * (VR$X - VR$theta))/(m-1) 126 SX[2,2] <- sum((VS$X - VS$theta) * (VS$X - VS$theta))/(m-1) 127 128 SY <- matrix(NA, ncol=2, nrow=2) 129 SY[1,1] <- sum((VR$Y - VR$theta) * (VR$Y - VR$theta))/(n-1) 130 SY[1,2] <- sum((VR$Y - VR$theta) * (VS$Y - VS$theta))/(n-1) 131 SY[2,1] <- sum((VS$Y - VS$theta) * (VR$Y - VR$theta))/(n-1) 132 SY[2,2] <- sum((VS$Y - VS$theta) * (VS$Y - VS$theta))/(n-1) 133 134 S <- SX/m + SY/n 135 L <- c(1,-1) 136 sig <- sqrt(L%*%S%*%L) 137 d <- VR$theta - VS$theta 138 return(list("d" = d, "sig" = sig[[1]])) 139} 140 141# Calls delongPlacementsCpp safely 142# Ensures that the theta value calculated is correct 143delongPlacements <- function(roc) { 144 placements <- delongPlacementsCpp(roc) 145 146 # Ensure theta equals auc 147 auc <- roc$auc / ifelse(roc$percent, 100, 1) 148 if (! isTRUE(all.equal(placements$theta, auc))) { 149 sessionInfo <- sessionInfo() 150 save(roc, placements, sessionInfo, file="pROC_bug.RData") 151 stop(sprintf("pROC: error in calculating DeLong's theta: got %.20f instead of %.20f. Diagnostic data saved in pROC_bug.RData. Please report this bug to <%s>.", placements$theta, auc, utils:: packageDescription("pROC")$BugReports)) 152 } 153 154 return(placements) 155} 156