1# File src/library/stats/R/plot.lm.R 2# Part of the R package, https://www.R-project.org 3# 4# Copyright (C) 1995-2019 The R Core Team 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# A copy of the GNU General Public License is available at 17# https://www.R-project.org/Licenses/ 18 19plot.lm <- 20function (x, which = c(1,2,3,5), ## was which = 1L:4L, 21 caption = list("Residuals vs Fitted", "Normal Q-Q", 22 "Scale-Location", "Cook's distance", 23 "Residuals vs Leverage", 24 expression("Cook's dist vs Leverage " * h[ii] / (1 - h[ii]))), 25 panel = if(add.smooth) function(x, y, ...) 26 panel.smooth(x, y, iter=iter.smooth, ...) else points, 27 sub.caption = NULL, main = "", 28 ask = prod(par("mfcol")) < length(which) && dev.interactive(), ..., 29 id.n = 3, labels.id = names(residuals(x)), cex.id = 0.75, 30 qqline = TRUE, cook.levels = c(0.5, 1.0), 31 add.smooth = getOption("add.smooth"), 32 iter.smooth = if(isGlm # && binomialLike 33 ) 0 else 3, 34 label.pos = c(4,2), cex.caption = 1, cex.oma.main = 1.25) 35{ 36 dropInf <- function(x, h) { 37 if(any(isInf <- h >= 1.0)) { 38 warning(gettextf("not plotting observations with leverage one:\n %s", 39 paste(which(isInf), collapse=", ")), 40 call. = FALSE, domain = NA) 41 x[isInf] <- NaN 42 } 43 x 44 } 45 46 if (!inherits(x, "lm")) 47 stop("use only with \"lm\" objects") 48 if(!is.numeric(which) || any(which < 1) || any(which > 6)) 49 stop("'which' must be in 1:6") 50 if((isGlm <- inherits(x, "glm"))) 51 binomialLike <- family(x)$family == "binomial" # || "multinomial" (maybe) 52 show <- rep(FALSE, 6) 53 show[which] <- TRUE 54 r <- if(isGlm) residuals(x, type="pearson") else residuals(x) 55 yh <- predict(x) # != fitted() for glm 56 w <- weights(x) 57 if(!is.null(w)) { # drop obs with zero wt: PR#6640 58 wind <- w != 0 59 r <- r[wind] 60 yh <- yh[wind] 61 w <- w[wind] 62 labels.id <- labels.id[wind] 63 } 64 n <- length(r) 65 if (any(show[2L:6L])) { 66 s <- if (inherits(x, "rlm")) x$s 67 else if(isGlm) sqrt(summary(x)$dispersion) 68 else sqrt(deviance(x)/df.residual(x)) 69 hii <- (infl <- influence(x, do.coef = FALSE))$hat 70 if (any(show[4L:6L])) { 71 cook <- cooks.distance(x, infl) 72 ## cook <- 73 ## if (isGlm) 74 ## cooks.distance (x, infl = infl) 75 ## else cooks.distance(x, infl = infl, sd = s, res = r, hat = hii) 76 } 77 } 78 if (any(show[c(2L,3L,5L)])) { 79 ## (Defensive programming used when fusing code for 2:3 and 5) 80 ylab5 <- ylab23 <- if(isGlm) "Std. Pearson resid." else "Standardized residuals" 81 r.w <- if (is.null(w)) r else sqrt(w) * r 82 ## NB: rs is already NaN if r=0, hii=1 83 rsp <- rs <- dropInf( if (isGlm) rstandard(x, type="pearson") else r.w/(s * sqrt(1 - hii)), hii ) 84 } 85 86 if (any(show[5L:6L])) { # using 'leverages' 87 r.hat <- range(hii, na.rm = TRUE) # though should never have NA 88 isConst.hat <- all(r.hat == 0) || 89 diff(r.hat) < 1e-10 * mean(hii, na.rm = TRUE) 90 } 91 if (any(show[c(1L, 3L)])) 92 l.fit <- if (isGlm) "Predicted values" else "Fitted values" 93 if (is.null(id.n)) 94 id.n <- 0 95 else { 96 id.n <- as.integer(id.n) 97 if(id.n < 0L || id.n > n) 98 stop(gettextf("'id.n' must be in {1,..,%d}", n), domain = NA) 99 } 100 if(id.n > 0L) { ## label the largest residuals 101 if(is.null(labels.id)) 102 labels.id <- paste(1L:n) 103 iid <- 1L:id.n 104 show.r <- sort.list(abs(r), decreasing = TRUE)[iid] 105 if(any(show[2L:3L])) 106 show.rs <- sort.list(abs(rs), decreasing = TRUE)[iid] 107 text.id <- function(x, y, ind, adj.x = TRUE) { 108 labpos <- 109 if(adj.x) label.pos[1+as.numeric(x > mean(range(x)))] else 3 110 text(x, y, labels.id[ind], cex = cex.id, xpd = TRUE, 111 pos = labpos, offset = 0.25) 112 } 113 } 114 getCaption <- function(k) # allow caption = "" , plotmath etc 115 if(length(caption) < k) NA_character_ else as.graphicsAnnot(caption[[k]]) 116 117 if(is.null(sub.caption)) { ## construct a default: 118 cal <- x$call 119 if (!is.na(m.f <- match("formula", names(cal)))) { 120 cal <- cal[c(1, m.f)] 121 names(cal)[2L] <- "" # drop " formula = " 122 } 123 cc <- deparse(cal, 80) # (80, 75) are ``parameters'' 124 nc <- nchar(cc[1L], "c") 125 abbr <- length(cc) > 1 || nc > 75 126 sub.caption <- 127 if(abbr) paste(substr(cc[1L], 1L, min(75L, nc)), "...") else cc[1L] 128 } 129 one.fig <- prod(par("mfcol")) == 1 130 if (ask) { 131 oask <- devAskNewPage(TRUE) 132 on.exit(devAskNewPage(oask)) 133 } 134 ##---------- Do the individual plots : ---------- 135 if (show[1L]) { 136 ylim <- range(r, na.rm=TRUE) 137 if(id.n > 0) 138 ylim <- extendrange(r = ylim, f = 0.08) 139 dev.hold() 140 plot(yh, r, xlab = l.fit, ylab = "Residuals", main = main, 141 ylim = ylim, type = "n", ...) 142 panel(yh, r, ...) 143 if (one.fig) 144 title(sub = sub.caption, ...) 145 mtext(getCaption(1), 3, 0.25, cex = cex.caption) 146 if(id.n > 0) { 147 y.id <- r[show.r] 148 y.id[y.id < 0] <- y.id[y.id < 0] - strheight(" ")/3 149 text.id(yh[show.r], y.id, show.r) 150 } 151 abline(h = 0, lty = 3, col = "gray") 152 dev.flush() 153 } 154 if (show[2L]) { ## Normal 155 ylim <- range(rs, na.rm=TRUE) 156 ylim[2L] <- ylim[2L] + diff(ylim) * 0.075 157 dev.hold() 158 qq <- qqnorm(rs, main = main, ylab = ylab23, ylim = ylim, ...) 159 if (qqline) qqline(rs, lty = 3, col = "gray50") 160 if (one.fig) 161 title(sub = sub.caption, ...) 162 mtext(getCaption(2), 3, 0.25, cex = cex.caption) 163 if(id.n > 0) 164 text.id(qq$x[show.rs], qq$y[show.rs], show.rs) 165 dev.flush() 166 } 167 if (show[3L]) { 168 sqrtabsr <- sqrt(abs(rs)) 169 ylim <- c(0, max(sqrtabsr, na.rm=TRUE)) 170 yl <- as.expression(substitute(sqrt(abs(YL)), list(YL=as.name(ylab23)))) 171 yhn0 <- if(is.null(w)) yh else yh[w!=0] 172 dev.hold() 173 plot(yhn0, sqrtabsr, xlab = l.fit, ylab = yl, main = main, 174 ylim = ylim, type = "n", ...) 175 panel(yhn0, sqrtabsr, ...) 176 if (one.fig) 177 title(sub = sub.caption, ...) 178 mtext(getCaption(3), 3, 0.25, cex = cex.caption) 179 if(id.n > 0) 180 text.id(yhn0[show.rs], sqrtabsr[show.rs], show.rs) 181 dev.flush() 182 } 183 if (show[4L]) { ## Cook's Distances 184 if(id.n > 0) { 185 show.r <- order(-cook)[iid]# index of largest 'id.n' ones 186 ymx <- cook[show.r[1L]] * 1.075 187 } else ymx <- max(cook, na.rm = TRUE) 188 dev.hold() 189 plot(cook, type = "h", ylim = c(0, ymx), main = main, 190 xlab = "Obs. number", ylab = "Cook's distance", ...) 191 if (one.fig) 192 title(sub = sub.caption, ...) 193 mtext(getCaption(4), 3, 0.25, cex = cex.caption) 194 if(id.n > 0) 195 text.id(show.r, cook[show.r], show.r, adj.x=FALSE) 196 dev.flush() 197 } 198 if (show[5L]) { 199 ### Now handled earlier, consistently with 2:3, except variable naming 200 ## ylab5 <- if (isGlm) "Std. Pearson resid." else "Standardized residuals" 201 ## r.w <- residuals(x, "pearson") 202 ## if(!is.null(w)) r.w <- r.w[wind] # drop 0-weight cases 203 ## rsp <- dropInf( r.w/(s * sqrt(1 - hii)), hii ) 204 ylim <- range(rsp, na.rm = TRUE) 205 if (id.n > 0) { 206 ylim <- extendrange(r = ylim, f = 0.08) 207 show.rsp <- order(-cook)[iid] 208 } 209 do.plot <- TRUE 210 if(isConst.hat) { ## leverages are all the same 211 if(missing(caption)) # set different default 212 caption[[5L]] <- "Constant Leverage:\n Residuals vs Factor Levels" 213 ## plot against factor-level combinations instead 214 aterms <- attributes(terms(x)) 215 ## classes w/o response 216 dcl <- aterms$dataClasses[ -aterms$response ] 217 facvars <- names(dcl)[dcl %in% c("factor", "ordered")] 218 mf <- model.frame(x)[facvars]# better than x$model 219 if(ncol(mf) > 0) { 220 dm <- data.matrix(mf) 221 ## #{levels} for each of the factors: 222 nf <- length(nlev <- unlist(unname(lapply(x$xlevels, length)))) 223 ff <- if(nf == 1) 1 else rev(cumprod(c(1, nlev[nf:2]))) 224 facval <- (dm-1) %*% ff 225 xx <- facval # for use in do.plot section. 226 dev.hold() 227 plot(facval, rsp, xlim = c(-1/2, sum((nlev-1) * ff) + 1/2), 228 ylim = ylim, xaxt = "n", 229 main = main, xlab = "Factor Level Combinations", 230 ylab = ylab5, type = "n", ...) 231 axis(1, at = ff[1L]*(1L:nlev[1L] - 1/2) - 1/2, 232 labels = x$xlevels[[1L]]) 233 mtext(paste(facvars[1L],":"), side = 1, line = 0.25, adj=-.05) 234 abline(v = ff[1L]*(0:nlev[1L]) - 1/2, col="gray", lty="F4") 235 panel(facval, rsp, ...) 236 abline(h = 0, lty = 3, col = "gray") 237 dev.flush() 238 } 239 else { # no factors 240 message(gettextf("hat values (leverages) are all = %s\n and there are no factor predictors; no plot no. 5", 241 format(mean(r.hat))), 242 domain = NA) 243 frame() 244 do.plot <- FALSE 245 } 246 } 247 else { ## Residual vs Leverage 248 xx <- hii 249 ## omit hatvalues of 1. 250 xx[xx >= 1] <- NA 251 252 dev.hold() 253 plot(xx, rsp, xlim = c(0, max(xx, na.rm = TRUE)), ylim = ylim, 254 main = main, xlab = "Leverage", ylab = ylab5, type = "n", 255 ...) 256 panel(xx, rsp, ...) 257 abline(h = 0, v = 0, lty = 3, col = "gray") 258 if (one.fig) 259 title(sub = sub.caption, ...) 260 if(length(cook.levels)) { 261 p <- x$rank # not length(coef(x)) 262 usr <- par("usr") 263 hh <- seq.int(min(r.hat[1L], r.hat[2L]/100), usr[2L], 264 length.out = 101) 265 for(crit in cook.levels) { 266 cl.h <- sqrt(crit*p*(1-hh)/hh) 267 lines(hh, cl.h, lty = 2, col = 2) 268 lines(hh,-cl.h, lty = 2, col = 2) 269 } 270 legend("bottomleft", legend = "Cook's distance", 271 lty = 2, col = 2, bty = "n") 272 xmax <- min(0.99, usr[2L]) 273 ymult <- sqrt(p*(1-xmax)/xmax) 274 aty <- sqrt(cook.levels)*ymult 275 axis(4, at = c(-rev(aty), aty), 276 labels = paste(c(rev(cook.levels), cook.levels)), 277 mgp = c(.25,.25,0), las = 2, tck = 0, 278 cex.axis = cex.id, col.axis = 2) 279 } 280 dev.flush() 281 } # if(const h_ii) .. else .. 282 if (do.plot) { 283 mtext(getCaption(5), 3, 0.25, cex = cex.caption) 284 if (id.n > 0) { 285 y.id <- rsp[show.rsp] 286 y.id[y.id < 0] <- y.id[y.id < 0] - strheight(" ")/3 287 text.id(xx[show.rsp], y.id, show.rsp) 288 } 289 } 290 } 291 if (show[6L]) { 292 g <- dropInf( hii/(1-hii), hii ) 293 ymx <- max(cook, na.rm = TRUE)*1.025 294 dev.hold() 295 plot(g, cook, xlim = c(0, max(g, na.rm=TRUE)), ylim = c(0, ymx), 296 main = main, ylab = "Cook's distance", 297 xlab = expression("Leverage " * h[ii]), 298 xaxt = "n", type = "n", ...) 299 panel(g, cook, ...) 300 ## Label axis with h_ii values 301 athat <- pretty(hii) 302 axis(1, at = athat/(1-athat), labels = paste(athat)) 303 if (one.fig) 304 title(sub = sub.caption, ...) 305 ## Draw pretty "contour" lines through origin and label them 306 p <- x$rank 307 bval <- pretty(sqrt(p*cook/g), 5) 308 usr <- par("usr") 309 xmax <- usr[2L] 310 ymax <- usr[4L] 311 for(i in seq_along(bval)) { 312 bi2 <- bval[i]^2 313 if(p*ymax > bi2*xmax) { 314 xi <- xmax + strwidth(" ")/3 315 yi <- bi2*xi/p 316 abline(0, bi2, lty = 2) 317 text(xi, yi, paste(bval[i]), adj = 0, xpd = TRUE) 318 } else { 319 yi <- ymax - 1.5*strheight(" ") 320 xi <- p*yi/bi2 321 lines(c(0, xi), c(0, yi), lty = 2) 322 text(xi, ymax-0.8*strheight(" "), paste(bval[i]), 323 adj = 0.5, xpd = TRUE) 324 } 325 } 326 327 ## axis(4, at=p*cook.levels, labels=paste(c(rev(cook.levels), cook.levels)), 328 ## mgp=c(.25,.25,0), las=2, tck=0, cex.axis=cex.id) 329 mtext(getCaption(6), 3, 0.25, cex = cex.caption) 330 if (id.n > 0) { 331 show.r <- order(-cook)[iid] 332 text.id(g[show.r], cook[show.r], show.r) 333 } 334 dev.flush() 335 } 336 337 if (!one.fig && par("oma")[3L] >= 1) 338 mtext(sub.caption, outer = TRUE, cex = cex.oma.main) 339 invisible() 340} 341