1# fancy scatterplot matrices (J. Fox)
2
3# 2010-09-04: J. Fox: changed color choice
4# 2010-09-16: fixed point color when col is length 1
5# 2011-03-08: J. Fox: changed col argument
6# 2012-04-18: J. Fox: fixed labels argument in scatterplotMatrix.formula()
7# 2012-09-12: J. Fox: smoother now given as function
8# 2012-09-19: J. Fox: restored smooth and span args for backwards compatibility
9# 2013-02-08: S. Weisberg: bug-fix for showLabels with groups
10# 2013-08-26: J. Fox: added use argument
11# 2014-08-07: J. Fox: plot univariate distributions by group (except for histogram)
12# 2014-08-17: J. Fox: report warning rather than error if not enough points in a group
13#                     to compute density
14# 2014-09-04: J. Fox: empty groups produce warning rather than error
15# 2017-02-14: J. Fox: consolidated smooth, id, legend, and ellipse arguments
16# 2017-02-17: S. Weisberg, more changes to arguments
17# 2017-02-19: J. Fox: bug fixes and improvement to col argument
18# 2017-04-18; S. Weisberg fixed bug in handling id=FALSE with matrix/data frame input.
19# 2017-04-18; S. Weisberg changed the default for by.groups to TRUE
20# 2017-04-20: S. Weisberg fixed bug with color handling
21# 2017-04-20: S. Weisberg the default diagonal is now adaptiveDensity using adaptiveKernel fn
22#                  diagonal argument is now a list similar to regLine and smooth
23#                  changed arguments and updated man page
24# 2017-05-08: S. Weisberg changed col=carPalette()
25# 2017-06-22: J. Fox: eliminated extraneous code for defunct labels argument; small cleanup
26# 2017-12-07: J. Fox: added fill, fill.alpha subargs to ellipse arg, suggestion of Michael Friendly.
27# 2018-02-09: S. Weisberg removed the transform and family arguments from the default method
28# 2018-04-02: J. Fox: warning rather than error for too few colors.
29# 2018-04-12: J. Fox: clean up handling of groups arg.
30# 2020-07-02: J. Fox: fix buglet in scatterplotMatrix.formula() when groups specified.
31
32scatterplotMatrix <- function(x, ...){
33  UseMethod("scatterplotMatrix")
34}
35
36scatterplotMatrix.formula <- function (formula, data=NULL, subset, ...) {
37  na.save <- options(na.action=na.omit)
38  on.exit(options(na.save))
39  na.pass <- function(dframe) dframe
40  m <- match.call(expand.dots = FALSE)
41  if (is.matrix(eval(m$data, sys.frame(sys.parent()))))
42    m$data <- as.data.frame(data)
43  m$id <- m$formula <- m$... <- NULL
44  m$na.action <- na.pass
45  m[[1]] <- as.name("model.frame")
46  if (!inherits(formula, "formula") | length(formula) != 2)
47    stop("invalid formula")
48  rhs <- formula[[2]]
49  if ("|" != deparse(rhs[[1]])){
50    groups <- FALSE
51  }
52  else{
53    groups <- TRUE
54    formula <- paste(as.character(formula), collapse=" ")
55    formula <- as.formula(sub("\\|", "+", formula))
56  }
57  m$formula <-formula
58  if (missing(data)){
59    X <- na.omit(eval(m, parent.frame()))
60 #   if (is.null(labels)) labels <- gsub("X", "", row.names(X))
61  }
62  else{
63    X <- eval(m, parent.frame())
64 #   if (is.null(labels)) labels <- rownames(X)
65  }
66  if (!groups) scatterplotMatrix(X, ...)
67  else{
68    ncol<-ncol(X)
69    scatterplotMatrix.default(X[, -ncol], groups=X[, ncol], ...)
70  }
71}
72
73
74scatterplotMatrix.default <-
75  function(x, smooth=TRUE, id=FALSE, legend=TRUE,
76           regLine=TRUE, ellipse=FALSE,
77           var.labels=colnames(x),
78           diagonal=TRUE,
79           plot.points=TRUE,
80           groups=NULL, by.groups=TRUE,
81           use=c("complete.obs", "pairwise.complete.obs"),
82           col=carPalette()[-1],
83           pch=1:n.groups,
84           cex=par("cex"), cex.axis=par("cex.axis"),
85           cex.labels=NULL, cex.main=par("cex.main"), row1attop=TRUE, ...){
86  transform <- FALSE
87#  family <- "bcPower"
88  force(col)
89#  n.groups <- if(by.groups) length(levels(groups)) else 1
90  if(isFALSE(diagonal)) diagonal <- "none" else {
91    diagonal.args <- applyDefaults(diagonal, defaults=list(method="adaptiveDensity"), type="diag")
92    diagonal <- if(!isFALSE(diagonal.args)) diagonal.args$method
93    diagonal.args$method <- NULL
94  }
95# regLine; use old arguments reg.line, lty and lwd
96  regLine.args <- applyDefaults(regLine, defaults=list(method=lm, lty=1, lwd=2,
97                                                       col=col), type="regLine")
98  if(!isFALSE(regLine.args)) {
99    reg.line <- regLine.args$method
100    lty <- regLine.args$lty
101    lwd <- regLine.args$lwd
102  } else reg.line <- "none"
103  # setup smoother, now including spread
104  n.groups <- if(is.null(groups)) 1
105    else {
106      if (!is.factor(groups)) groups <- as.factor(groups)
107      length(levels(groups))
108    }
109  smoother.args <- applyDefaults(smooth, defaults=list(smoother=loessLine,
110                              spread=(n.groups)==1, col=col, lty.smooth=2, lty.spread=4), type="smooth")
111  if (!isFALSE(smoother.args)) {
112    # check for an argument 'var' in smoother.args.
113    if(!is.null(smoother.args$var)) smoother.args$spread <- smoother.args$var
114    # end change
115    smoother <- smoother.args$smoother
116    spread <- if(is.null(smoother.args$spread)) TRUE else smoother.args$spread
117    smoother.args$spread <- smoother.args$smoother <- NULL
118    if(n.groups==1) smoother.args$col <- col[1]
119  }
120  else smoother <- "none"
121  # setup id
122  id <- applyDefaults(id, defaults=list(method="mahal", n=2, cex=1, col=col, location="lr"), type="id")
123  if (is.list(id) && "identify" %in% id$method) stop("interactive point identification not permitted")
124  if (isFALSE(id)){
125    id.n <- 0
126    id.method <- "mahal"
127    labels <- id.cex <- id.col <- id.location <- NULL
128  }
129  else{
130    labels <- if(!is.null(id$labels)) id$labels else row.names(x)
131    id.method <- id$method
132    id.n <- id$n
133    id.cex <- id$cex
134    id.col <- id$col
135    id.location <- id$location
136  }
137  if (is.null(labels)) labels <- as.character(seq(length.out=nrow(x)))
138  legend <- applyDefaults(legend, defaults=list(coords=NULL), type="legend")
139  if (!(isFALSE(legend) || missing(groups))){
140    legend.plot <- TRUE
141    legend.pos <- legend$coords
142  }
143  else {
144    legend.plot <- FALSE
145    legend.pos <- NULL
146  }
147  # ellipse
148  ellipse <- applyDefaults(ellipse, defaults=list(levels=c(0.5, 0.95), robust=TRUE, fill=TRUE, fill.alpha=0.2), type="ellipse")
149  if (isFALSE(ellipse)){
150    levels <- NULL
151    robust <- NULL
152  }
153  else{
154    levels <- ellipse$levels
155    robust <- ellipse$robust
156    fill <- ellipse$fill
157    fill.alpha <- ellipse$fill.alpha
158    ellipse <- TRUE
159  }
160  # pre 2017 code follows
161#  family <- match.arg(family)
162  use <- match.arg(use)
163  na.action <- if (use == "complete.obs") na.omit else na.pass
164  if (!(missing(groups))){
165    x <- na.action(data.frame(groups, labels, x, stringsAsFactors=FALSE))
166    #      groups <- as.factor(as.character(x[, 1]))
167    groups <- x$groups
168#    if (!is.factor(groups)) groups <- as.factor(as.character(x[,1]))
169    labels <- x[, 2]
170    x <- x[, -(1:2)]
171  }
172  else {
173    x <- na.action(data.frame(labels, x, stringsAsFactors=FALSE))
174    labels <- x[, 1]
175    x <- x[, -1]
176    id.col <- id.col[1]
177  }
178  legendPlot <- function(position="topright"){
179    usr <- par("usr")
180    legend(position, bg="white",
181           legend=levels(groups), pch=pch, col=col[1:n.groups],
182           cex=cex)
183  }
184  do.legend <- legend.plot
185####### diagonal panel functions
186  # The following panel function adapted from Richard Heiberger
187  panel.adaptiveDensity <- function(x, ...){
188    args <- applyDefaults(diagonal.args,
189        defaults=list(bw=bw.nrd0, adjust=1, kernel=dnorm, na.rm=TRUE))
190    if (n.groups > 1){
191      levs <- levels(groups)
192      for (i in 1:n.groups){
193        xx <- x[levs[i] == groups]
194        dens.x <- try(adaptiveKernel(xx, adjust = args$adjust, na.rm=args$na.rm,
195                              bw=args$bw, kernel=args$kernel), silent=TRUE)
196        if (!inherits(dens.x, "try-error")){
197          lines(dens.x$x, min(x, na.rm=TRUE) + dens.x$y *
198                  diff(range(x, na.rm=TRUE))/diff(range(dens.x$y, na.rm=TRUE)), col=col[i])
199        }
200        else warning("cannot estimate density for group ", levs[i], "\n",
201                     dens.x, "\n")
202        rug(xx, col=col[i])
203      }
204    }
205    else {
206      dens.x <- adaptiveKernel(x, adjust = args$adjust, na.rm=args$na.rm,
207                        bw=args$bw, kernel=args$kernel)
208      lines(dens.x$x, min(x, na.rm=TRUE) + dens.x$y * diff(range(x, na.rm=TRUE))/diff(range(dens.x$y, na.rm=TRUE)), col=col[1])
209      rug(x)
210    }
211    if (do.legend) legendPlot(position=if (is.null(legend.pos)) "topright" else legend.pos)
212    do.legend <<- FALSE
213  }
214#
215  panel.density <- function(x, ...){
216    args <- applyDefaults(diagonal.args,
217                          defaults=list(bw="nrd0", adjust=1, kernel="gaussian", na.rm=TRUE))
218    if (n.groups > 1){
219      levs <- levels(groups)
220      for (i in 1:n.groups){
221        xx <- x[levs[i] == groups]
222        dens.x <- try(density(xx, adjust = args$adjust, na.rm=args$na.rm,
223                              bw=args$bw, kernel=args$kernel), silent=TRUE)
224        if (!inherits(dens.x, "try-error")){
225          lines(dens.x$x, min(x, na.rm=TRUE) + dens.x$y *
226                  diff(range(x, na.rm=TRUE))/diff(range(dens.x$y, na.rm=TRUE)), col=col[i])
227        }
228        else warning("cannot estimate density for group ", levs[i], "\n",
229                     dens.x, "\n")
230        rug(xx, col=col[i])
231      }
232    }
233    else {
234      dens.x <- density(x, adjust = args$adjust, na.rm=args$na.rm,
235                        bw=args$bw, kernel=args$kernel)
236      lines(dens.x$x, min(x, na.rm=TRUE) + dens.x$y * diff(range(x, na.rm=TRUE))/diff(range(dens.x$y, na.rm=TRUE)), col=col[1])
237      rug(x)
238    }
239    if (do.legend) legendPlot(position=if (is.null(legend.pos)) "topright" else legend.pos)
240    do.legend <<- FALSE
241  }
242  panel.histogram <- function(x, ...){
243    par(new=TRUE)
244    args <- applyDefaults(diagonal.args, defaults=list(breaks="FD"))
245    h.col <- col[1]
246    if (h.col == "black") h.col <- "gray"
247    hist(x, main="", axes=FALSE, breaks=args$breaks, col=h.col)
248    if (do.legend) legendPlot(position=if (is.null(legend.pos)) "topright" else legend.pos)
249    do.legend <<- FALSE
250  }
251  panel.boxplot <- function(x, ...){
252    b.col <- col[1:n.groups]
253    b.col[b.col == "black"] <- "gray"
254    par(new=TRUE)
255    if (n.groups == 1) boxplot(x, axes=FALSE, main="", col=col[1])
256    else boxplot(x ~ groups, axes=FALSE, main="", col=b.col)
257    if (do.legend) legendPlot(position=if (is.null(legend.pos)) "topright" else legend.pos)
258    do.legend <<- FALSE
259  }
260  # The following panel function adapted from Richard Heiberger
261  panel.oned <- function(x, ...) {
262    range <- range(x, na.rm=TRUE)
263    delta <- diff(range)/50
264    y <- mean(range)
265    if (n.groups == 1) segments(x - delta, x, x + delta, x, col = col[1])
266    else {
267      segments(x - delta, x, x + delta, x, col = col[as.numeric(groups)])
268    }
269    if (do.legend) legendPlot(position=if (is.null(legend.pos)) "bottomright" else legend.pos)
270    do.legend <<- FALSE
271  }
272  panel.qqplot <- function(x, ...){
273    par(new=TRUE)
274    if (n.groups == 1) qqnorm(x, axes=FALSE, xlab="", ylab="", main="", col=col[1])
275    else qqnorm(x, axes=FALSE, xlab="", ylab="", main="", col=col[as.numeric(groups)])
276    qqline(x, col=col[1])
277    if (do.legend) legendPlot(position=if (is.null(legend.pos)) "bottomright" else legend.pos)
278    do.legend <<- FALSE
279  }
280  panel.blank <- function(x, ...){
281    if (do.legend) legendPlot(if (is.null(legend.pos)) "topright" else legend.pos)
282    do.legend <<- FALSE
283  }
284  which.fn <- match(diagonal,
285                    c("adaptiveDensity", "density", "boxplot", "histogram", "oned", "qqplot", "none"))
286  if(is.na(which.fn)) stop("incorrect name for the diagonal argument, see ?scatterplotMatrix")
287  diag <- list(panel.adaptiveDensity, panel.density, panel.boxplot, panel.histogram, panel.oned,
288               panel.qqplot, panel.blank)[[which.fn]]
289  groups <- as.factor(if(missing(groups)) rep(1, length(x[, 1])) else groups)
290  counts <- table(groups)
291  if (any(counts == 0)){
292    levels <- levels(groups)
293    warning("the following groups are empty: ", paste(levels[counts == 0], collapse=", "))
294    groups <- factor(groups, levels=levels[counts > 0])
295  }
296#  n.groups <- length(levels(groups))
297  if (n.groups > length(col)) {
298    warning("number of groups exceeds number of available colors\n  colors are recycled")
299    col <- rep(col, n.groups)
300  }
301  if (length(col) == 1) col <- rep(col, 3)
302  labs <- labels
303  pairs(x, labels=var.labels,
304        cex.axis=cex.axis, cex.main=cex.main, cex.labels=cex.labels, cex=cex,
305        diag.panel=diag, row1attop = row1attop,
306        panel=function(x, y, ...){
307          for (i in 1:n.groups){
308            subs <- groups == levels(groups)[i]
309            if (plot.points) points(x[subs], y[subs], pch=pch[i], col=col[if (n.groups == 1) 1 else i], cex=cex)
310            if (by.groups){
311              if (is.function(smoother)) smoother(x[subs], y[subs], col=smoother.args$col[i],
312                                                  log.x=FALSE, log.y=FALSE, spread=spread, smoother.args=smoother.args)
313              if (is.function(reg.line)) regLine(reg.line(y[subs] ~ x[subs]), lty=lty, lwd=lwd, col=regLine.args$col[i])
314              if (ellipse) dataEllipse(x[subs], y[subs], plot.points=FALSE,
315                                       levels=levels, col=col[i], robust=robust, lwd=1,
316                                       fill=fill, fill.alpha=fill.alpha)
317              showLabels(x[subs], y[subs], labs[subs], method=id.method,
318                         n=id.n, col=col[i], cex=id.cex, location=id.location,
319                         all=list(labels=labs, subs=subs))
320            }
321          }
322          if (!by.groups){
323            if (is.function(reg.line)) abline(reg.line(y ~ x), lty=lty, lwd=lwd, col=regLine.args$col[1])
324            if (is.function(smoother)) smoother(x, y, col=col[1],
325                                                log.x=FALSE, log.y=FALSE, spread=spread, smoother.args=smoother.args)
326            if (ellipse) dataEllipse(x, y, plot.points=FALSE, levels=levels, col=smoother.args$col,
327                                     robust=robust, lwd=1, fill=fill, fill.alpha=fill.alpha)
328            showLabels(x, y, labs, method=id.method,
329                       n=id.n, col=id.col, location=id.location, cex=id.cex)
330          }
331        }, ...
332  )
333}
334
335spm <- function(x, ...){
336  scatterplotMatrix(x, ...)
337}
338