1#  File src/library/stats/R/plot.lm.R
2#  Part of the R package, https://www.R-project.org
3#
4#  Copyright (C) 1995-2019 The R Core Team
5#
6#  This program is free software; you can redistribute it and/or modify
7#  it under the terms of the GNU General Public License as published by
8#  the Free Software Foundation; either version 2 of the License, or
9#  (at your option) any later version.
10#
11#  This program is distributed in the hope that it will be useful,
12#  but WITHOUT ANY WARRANTY; without even the implied warranty of
13#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#  GNU General Public License for more details.
15#
16#  A copy of the GNU General Public License is available at
17#  https://www.R-project.org/Licenses/
18
19plot.lm <-
20function (x, which = c(1,2,3,5), ## was which = 1L:4L,
21	  caption = list("Residuals vs Fitted", "Normal Q-Q",
22	  "Scale-Location", "Cook's distance",
23	  "Residuals vs Leverage",
24	  expression("Cook's dist vs Leverage  " * h[ii] / (1 - h[ii]))),
25	  panel = if(add.smooth) function(x, y, ...)
26              panel.smooth(x, y, iter=iter.smooth, ...) else points,
27	  sub.caption = NULL, main = "",
28	  ask = prod(par("mfcol")) < length(which) && dev.interactive(), ...,
29	  id.n = 3, labels.id = names(residuals(x)), cex.id = 0.75,
30	  qqline = TRUE, cook.levels = c(0.5, 1.0),
31	  add.smooth = getOption("add.smooth"),
32          iter.smooth = if(isGlm # && binomialLike
33                           ) 0 else 3,
34	  label.pos = c(4,2), cex.caption = 1, cex.oma.main = 1.25)
35{
36    dropInf <- function(x, h) {
37	if(any(isInf <- h >= 1.0)) {
38            warning(gettextf("not plotting observations with leverage one:\n  %s",
39                             paste(which(isInf), collapse=", ")),
40                    call. = FALSE, domain = NA)
41	    x[isInf] <- NaN
42	}
43	x
44    }
45
46    if (!inherits(x, "lm"))
47	stop("use only with \"lm\" objects")
48    if(!is.numeric(which) || any(which < 1) || any(which > 6))
49	stop("'which' must be in 1:6")
50    if((isGlm <- inherits(x, "glm")))
51        binomialLike <- family(x)$family == "binomial" # || "multinomial" (maybe)
52    show <- rep(FALSE, 6)
53    show[which] <- TRUE
54    r <- if(isGlm) residuals(x, type="pearson") else residuals(x)
55    yh <- predict(x) # != fitted() for glm
56    w <- weights(x)
57    if(!is.null(w)) { # drop obs with zero wt: PR#6640
58	wind <- w != 0
59	r <- r[wind]
60	yh <- yh[wind]
61	w <- w[wind]
62	labels.id <- labels.id[wind]
63    }
64    n <- length(r)
65    if (any(show[2L:6L])) {
66	s <- if (inherits(x, "rlm")) x$s
67	     else if(isGlm) sqrt(summary(x)$dispersion)
68	     else sqrt(deviance(x)/df.residual(x))
69	hii <- (infl <- influence(x, do.coef = FALSE))$hat
70	if (any(show[4L:6L])) {
71            cook <- cooks.distance(x, infl)
72	    ## cook <-
73	    ##     if (isGlm)
74	    ##         cooks.distance (x, infl = infl)
75	    ##     else cooks.distance(x, infl = infl, sd = s, res = r, hat = hii)
76	}
77    }
78    if (any(show[c(2L,3L,5L)])) {
79        ## (Defensive programming used when fusing code for 2:3 and 5)
80	ylab5 <- ylab23 <- if(isGlm) "Std. Pearson resid." else "Standardized residuals"
81	r.w <- if (is.null(w)) r else sqrt(w) * r
82        ## NB: rs is already NaN if r=0, hii=1
83	rsp <- rs <- dropInf( if (isGlm) rstandard(x, type="pearson") else r.w/(s * sqrt(1 - hii)), hii )
84    }
85
86    if (any(show[5L:6L])) { # using 'leverages'
87        r.hat <- range(hii, na.rm = TRUE) # though should never have NA
88        isConst.hat <- all(r.hat == 0) ||
89            diff(r.hat) < 1e-10 * mean(hii, na.rm = TRUE)
90    }
91    if (any(show[c(1L, 3L)]))
92	l.fit <- if (isGlm) "Predicted values" else "Fitted values"
93    if (is.null(id.n))
94	id.n <- 0
95    else {
96	id.n <- as.integer(id.n)
97	if(id.n < 0L || id.n > n)
98	    stop(gettextf("'id.n' must be in {1,..,%d}", n), domain = NA)
99    }
100    if(id.n > 0L) { ## label the largest residuals
101	if(is.null(labels.id))
102	    labels.id <- paste(1L:n)
103	iid <- 1L:id.n
104	show.r <- sort.list(abs(r), decreasing = TRUE)[iid]
105	if(any(show[2L:3L]))
106	    show.rs <- sort.list(abs(rs), decreasing = TRUE)[iid]
107	text.id <- function(x, y, ind, adj.x = TRUE) {
108	    labpos <-
109                if(adj.x) label.pos[1+as.numeric(x > mean(range(x)))] else 3
110	    text(x, y, labels.id[ind], cex = cex.id, xpd = TRUE,
111		 pos = labpos, offset = 0.25)
112	}
113    }
114    getCaption <- function(k) # allow caption = "" , plotmath etc
115        if(length(caption) < k) NA_character_ else as.graphicsAnnot(caption[[k]])
116
117    if(is.null(sub.caption)) { ## construct a default:
118	cal <- x$call
119	if (!is.na(m.f <- match("formula", names(cal)))) {
120	    cal <- cal[c(1, m.f)]
121	    names(cal)[2L] <- "" # drop	" formula = "
122	}
123	cc <- deparse(cal, 80) # (80, 75) are ``parameters''
124	nc <- nchar(cc[1L], "c")
125	abbr <- length(cc) > 1 || nc > 75
126	sub.caption <-
127	    if(abbr) paste(substr(cc[1L], 1L, min(75L, nc)), "...") else cc[1L]
128    }
129    one.fig <- prod(par("mfcol")) == 1
130    if (ask) {
131	oask <- devAskNewPage(TRUE)
132	on.exit(devAskNewPage(oask))
133    }
134    ##---------- Do the individual plots : ----------
135    if (show[1L]) {
136	ylim <- range(r, na.rm=TRUE)
137	if(id.n > 0)
138	    ylim <- extendrange(r = ylim, f = 0.08)
139        dev.hold()
140	plot(yh, r, xlab = l.fit, ylab = "Residuals", main = main,
141	     ylim = ylim, type = "n", ...)
142	panel(yh, r, ...)
143	if (one.fig)
144	    title(sub = sub.caption, ...)
145	mtext(getCaption(1), 3, 0.25, cex = cex.caption)
146	if(id.n > 0) {
147	    y.id <- r[show.r]
148	    y.id[y.id < 0] <- y.id[y.id < 0] - strheight(" ")/3
149	    text.id(yh[show.r], y.id, show.r)
150	}
151	abline(h = 0, lty = 3, col = "gray")
152        dev.flush()
153    }
154    if (show[2L]) { ## Normal
155	ylim <- range(rs, na.rm=TRUE)
156	ylim[2L] <- ylim[2L] + diff(ylim) * 0.075
157        dev.hold()
158	qq <- qqnorm(rs, main = main, ylab = ylab23, ylim = ylim, ...)
159	if (qqline) qqline(rs, lty = 3, col = "gray50")
160	if (one.fig)
161	    title(sub = sub.caption, ...)
162	mtext(getCaption(2), 3, 0.25, cex = cex.caption)
163	if(id.n > 0)
164	    text.id(qq$x[show.rs], qq$y[show.rs], show.rs)
165        dev.flush()
166    }
167    if (show[3L]) {
168	sqrtabsr <- sqrt(abs(rs))
169	ylim <- c(0, max(sqrtabsr, na.rm=TRUE))
170	yl <- as.expression(substitute(sqrt(abs(YL)), list(YL=as.name(ylab23))))
171	yhn0 <- if(is.null(w)) yh else yh[w!=0]
172        dev.hold()
173	plot(yhn0, sqrtabsr, xlab = l.fit, ylab = yl, main = main,
174	     ylim = ylim, type = "n", ...)
175	panel(yhn0, sqrtabsr, ...)
176	if (one.fig)
177	    title(sub = sub.caption, ...)
178	mtext(getCaption(3), 3, 0.25, cex = cex.caption)
179	if(id.n > 0)
180	    text.id(yhn0[show.rs], sqrtabsr[show.rs], show.rs)
181        dev.flush()
182    }
183    if (show[4L]) { ## Cook's Distances
184	if(id.n > 0) {
185	    show.r <- order(-cook)[iid]# index of largest 'id.n' ones
186	    ymx <- cook[show.r[1L]] * 1.075
187	} else ymx <- max(cook, na.rm = TRUE)
188        dev.hold()
189	plot(cook, type = "h", ylim = c(0, ymx), main = main,
190	     xlab = "Obs. number", ylab = "Cook's distance", ...)
191	if (one.fig)
192	    title(sub = sub.caption, ...)
193	mtext(getCaption(4), 3, 0.25, cex = cex.caption)
194	if(id.n > 0)
195	    text.id(show.r, cook[show.r], show.r, adj.x=FALSE)
196        dev.flush()
197    }
198    if (show[5L]) {
199        ### Now handled earlier, consistently with 2:3, except variable naming
200        ## ylab5 <- if (isGlm) "Std. Pearson resid." else "Standardized residuals"
201        ## r.w <- residuals(x, "pearson")
202        ## if(!is.null(w)) r.w <- r.w[wind] # drop 0-weight cases
203 	## rsp <- dropInf( r.w/(s * sqrt(1 - hii)), hii )
204	ylim <- range(rsp, na.rm = TRUE)
205	if (id.n > 0) {
206	    ylim <- extendrange(r = ylim, f = 0.08)
207	    show.rsp <- order(-cook)[iid]
208	}
209        do.plot <- TRUE
210        if(isConst.hat) { ## leverages are all the same
211	    if(missing(caption)) # set different default
212		caption[[5L]] <- "Constant Leverage:\n Residuals vs Factor Levels"
213            ## plot against factor-level combinations instead
214            aterms <- attributes(terms(x))
215            ## classes w/o response
216            dcl <- aterms$dataClasses[ -aterms$response ]
217            facvars <- names(dcl)[dcl %in% c("factor", "ordered")]
218            mf <- model.frame(x)[facvars]# better than x$model
219            if(ncol(mf) > 0) {
220                dm <- data.matrix(mf)
221                ## #{levels} for each of the factors:
222                nf <- length(nlev <- unlist(unname(lapply(x$xlevels, length))))
223                ff <- if(nf == 1) 1 else rev(cumprod(c(1, nlev[nf:2])))
224                facval <- (dm-1) %*% ff
225                xx <- facval # for use in do.plot section.
226                dev.hold()
227                plot(facval, rsp, xlim = c(-1/2, sum((nlev-1) * ff) + 1/2),
228                     ylim = ylim, xaxt = "n",
229                     main = main, xlab = "Factor Level Combinations",
230                     ylab = ylab5, type = "n", ...)
231                axis(1, at = ff[1L]*(1L:nlev[1L] - 1/2) - 1/2,
232                     labels = x$xlevels[[1L]])
233                mtext(paste(facvars[1L],":"), side = 1, line = 0.25, adj=-.05)
234                abline(v = ff[1L]*(0:nlev[1L]) - 1/2, col="gray", lty="F4")
235                panel(facval, rsp, ...)
236                abline(h = 0, lty = 3, col = "gray")
237                dev.flush()
238            }
239	    else { # no factors
240                message(gettextf("hat values (leverages) are all = %s\n and there are no factor predictors; no plot no. 5",
241                                 format(mean(r.hat))),
242                        domain = NA)
243                frame()
244                do.plot <- FALSE
245            }
246        }
247        else { ## Residual vs Leverage
248            xx <- hii
249            ## omit hatvalues of 1.
250            xx[xx >= 1] <- NA
251
252            dev.hold()
253            plot(xx, rsp, xlim = c(0, max(xx, na.rm = TRUE)), ylim = ylim,
254                 main = main, xlab = "Leverage", ylab = ylab5, type = "n",
255                 ...)
256            panel(xx, rsp, ...)
257            abline(h = 0, v = 0, lty = 3, col = "gray")
258            if (one.fig)
259                title(sub = sub.caption, ...)
260            if(length(cook.levels)) {
261                p <- x$rank # not length(coef(x))
262                usr <- par("usr")
263                hh <- seq.int(min(r.hat[1L], r.hat[2L]/100), usr[2L],
264                              length.out = 101)
265                for(crit in cook.levels) {
266                    cl.h <- sqrt(crit*p*(1-hh)/hh)
267                    lines(hh, cl.h, lty = 2, col = 2)
268                    lines(hh,-cl.h, lty = 2, col = 2)
269                }
270                legend("bottomleft", legend = "Cook's distance",
271                       lty = 2, col = 2, bty = "n")
272                xmax <- min(0.99, usr[2L])
273                ymult <- sqrt(p*(1-xmax)/xmax)
274                aty <- sqrt(cook.levels)*ymult
275                axis(4, at = c(-rev(aty), aty),
276                     labels = paste(c(rev(cook.levels), cook.levels)),
277                     mgp = c(.25,.25,0), las = 2, tck = 0,
278                     cex.axis = cex.id, col.axis = 2)
279            }
280            dev.flush()
281        } # if(const h_ii) .. else ..
282	if (do.plot) {
283	    mtext(getCaption(5), 3, 0.25, cex = cex.caption)
284	    if (id.n > 0) {
285		y.id <- rsp[show.rsp]
286		y.id[y.id < 0] <- y.id[y.id < 0] - strheight(" ")/3
287		text.id(xx[show.rsp], y.id, show.rsp)
288	    }
289	}
290    }
291    if (show[6L]) {
292	g <- dropInf( hii/(1-hii), hii )
293	ymx <- max(cook, na.rm = TRUE)*1.025
294        dev.hold()
295	plot(g, cook, xlim = c(0, max(g, na.rm=TRUE)), ylim = c(0, ymx),
296	     main = main, ylab = "Cook's distance",
297             xlab = expression("Leverage  " * h[ii]),
298	     xaxt = "n", type = "n", ...)
299	panel(g, cook, ...)
300        ## Label axis with h_ii values
301	athat <- pretty(hii)
302	axis(1, at = athat/(1-athat), labels = paste(athat))
303	if (one.fig)
304	    title(sub = sub.caption, ...)
305        ## Draw pretty "contour" lines through origin and label them
306	p <- x$rank
307	bval <- pretty(sqrt(p*cook/g), 5)
308	usr <- par("usr")
309	xmax <- usr[2L]
310	ymax <- usr[4L]
311	for(i in seq_along(bval)) {
312	    bi2 <- bval[i]^2
313	    if(p*ymax > bi2*xmax) {
314		xi <- xmax + strwidth(" ")/3
315		yi <- bi2*xi/p
316		abline(0, bi2, lty = 2)
317		text(xi, yi, paste(bval[i]), adj = 0, xpd = TRUE)
318	    } else {
319		yi <- ymax - 1.5*strheight(" ")
320		xi <- p*yi/bi2
321		lines(c(0, xi), c(0, yi), lty = 2)
322		text(xi, ymax-0.8*strheight(" "), paste(bval[i]),
323		     adj = 0.5, xpd = TRUE)
324	    }
325	}
326
327	## axis(4, at=p*cook.levels, labels=paste(c(rev(cook.levels), cook.levels)),
328	##	mgp=c(.25,.25,0), las=2, tck=0, cex.axis=cex.id)
329	mtext(getCaption(6), 3, 0.25, cex = cex.caption)
330	if (id.n > 0) {
331	    show.r <- order(-cook)[iid]
332            text.id(g[show.r], cook[show.r], show.r)
333        }
334        dev.flush()
335    }
336
337    if (!one.fig && par("oma")[3L] >= 1)
338	mtext(sub.caption, outer = TRUE, cex = cex.oma.main)
339    invisible()
340}
341