1# fancy scatterplot matrices (J. Fox) 2 3# 2010-09-04: J. Fox: changed color choice 4# 2010-09-16: fixed point color when col is length 1 5# 2011-03-08: J. Fox: changed col argument 6# 2012-04-18: J. Fox: fixed labels argument in scatterplotMatrix.formula() 7# 2012-09-12: J. Fox: smoother now given as function 8# 2012-09-19: J. Fox: restored smooth and span args for backwards compatibility 9# 2013-02-08: S. Weisberg: bug-fix for showLabels with groups 10# 2013-08-26: J. Fox: added use argument 11# 2014-08-07: J. Fox: plot univariate distributions by group (except for histogram) 12# 2014-08-17: J. Fox: report warning rather than error if not enough points in a group 13# to compute density 14# 2014-09-04: J. Fox: empty groups produce warning rather than error 15# 2017-02-14: J. Fox: consolidated smooth, id, legend, and ellipse arguments 16# 2017-02-17: S. Weisberg, more changes to arguments 17# 2017-02-19: J. Fox: bug fixes and improvement to col argument 18# 2017-04-18; S. Weisberg fixed bug in handling id=FALSE with matrix/data frame input. 19# 2017-04-18; S. Weisberg changed the default for by.groups to TRUE 20# 2017-04-20: S. Weisberg fixed bug with color handling 21# 2017-04-20: S. Weisberg the default diagonal is now adaptiveDensity using adaptiveKernel fn 22# diagonal argument is now a list similar to regLine and smooth 23# changed arguments and updated man page 24# 2017-05-08: S. Weisberg changed col=carPalette() 25# 2017-06-22: J. Fox: eliminated extraneous code for defunct labels argument; small cleanup 26# 2017-12-07: J. Fox: added fill, fill.alpha subargs to ellipse arg, suggestion of Michael Friendly. 27# 2018-02-09: S. Weisberg removed the transform and family arguments from the default method 28# 2018-04-02: J. Fox: warning rather than error for too few colors. 29# 2018-04-12: J. Fox: clean up handling of groups arg. 30# 2020-07-02: J. Fox: fix buglet in scatterplotMatrix.formula() when groups specified. 31 32scatterplotMatrix <- function(x, ...){ 33 UseMethod("scatterplotMatrix") 34} 35 36scatterplotMatrix.formula <- function (formula, data=NULL, subset, ...) { 37 na.save <- options(na.action=na.omit) 38 on.exit(options(na.save)) 39 na.pass <- function(dframe) dframe 40 m <- match.call(expand.dots = FALSE) 41 if (is.matrix(eval(m$data, sys.frame(sys.parent())))) 42 m$data <- as.data.frame(data) 43 m$id <- m$formula <- m$... <- NULL 44 m$na.action <- na.pass 45 m[[1]] <- as.name("model.frame") 46 if (!inherits(formula, "formula") | length(formula) != 2) 47 stop("invalid formula") 48 rhs <- formula[[2]] 49 if ("|" != deparse(rhs[[1]])){ 50 groups <- FALSE 51 } 52 else{ 53 groups <- TRUE 54 formula <- paste(as.character(formula), collapse=" ") 55 formula <- as.formula(sub("\\|", "+", formula)) 56 } 57 m$formula <-formula 58 if (missing(data)){ 59 X <- na.omit(eval(m, parent.frame())) 60 # if (is.null(labels)) labels <- gsub("X", "", row.names(X)) 61 } 62 else{ 63 X <- eval(m, parent.frame()) 64 # if (is.null(labels)) labels <- rownames(X) 65 } 66 if (!groups) scatterplotMatrix(X, ...) 67 else{ 68 ncol<-ncol(X) 69 scatterplotMatrix.default(X[, -ncol], groups=X[, ncol], ...) 70 } 71} 72 73 74scatterplotMatrix.default <- 75 function(x, smooth=TRUE, id=FALSE, legend=TRUE, 76 regLine=TRUE, ellipse=FALSE, 77 var.labels=colnames(x), 78 diagonal=TRUE, 79 plot.points=TRUE, 80 groups=NULL, by.groups=TRUE, 81 use=c("complete.obs", "pairwise.complete.obs"), 82 col=carPalette()[-1], 83 pch=1:n.groups, 84 cex=par("cex"), cex.axis=par("cex.axis"), 85 cex.labels=NULL, cex.main=par("cex.main"), row1attop=TRUE, ...){ 86 transform <- FALSE 87# family <- "bcPower" 88 force(col) 89# n.groups <- if(by.groups) length(levels(groups)) else 1 90 if(isFALSE(diagonal)) diagonal <- "none" else { 91 diagonal.args <- applyDefaults(diagonal, defaults=list(method="adaptiveDensity"), type="diag") 92 diagonal <- if(!isFALSE(diagonal.args)) diagonal.args$method 93 diagonal.args$method <- NULL 94 } 95# regLine; use old arguments reg.line, lty and lwd 96 regLine.args <- applyDefaults(regLine, defaults=list(method=lm, lty=1, lwd=2, 97 col=col), type="regLine") 98 if(!isFALSE(regLine.args)) { 99 reg.line <- regLine.args$method 100 lty <- regLine.args$lty 101 lwd <- regLine.args$lwd 102 } else reg.line <- "none" 103 # setup smoother, now including spread 104 n.groups <- if(is.null(groups)) 1 105 else { 106 if (!is.factor(groups)) groups <- as.factor(groups) 107 length(levels(groups)) 108 } 109 smoother.args <- applyDefaults(smooth, defaults=list(smoother=loessLine, 110 spread=(n.groups)==1, col=col, lty.smooth=2, lty.spread=4), type="smooth") 111 if (!isFALSE(smoother.args)) { 112 # check for an argument 'var' in smoother.args. 113 if(!is.null(smoother.args$var)) smoother.args$spread <- smoother.args$var 114 # end change 115 smoother <- smoother.args$smoother 116 spread <- if(is.null(smoother.args$spread)) TRUE else smoother.args$spread 117 smoother.args$spread <- smoother.args$smoother <- NULL 118 if(n.groups==1) smoother.args$col <- col[1] 119 } 120 else smoother <- "none" 121 # setup id 122 id <- applyDefaults(id, defaults=list(method="mahal", n=2, cex=1, col=col, location="lr"), type="id") 123 if (is.list(id) && "identify" %in% id$method) stop("interactive point identification not permitted") 124 if (isFALSE(id)){ 125 id.n <- 0 126 id.method <- "mahal" 127 labels <- id.cex <- id.col <- id.location <- NULL 128 } 129 else{ 130 labels <- if(!is.null(id$labels)) id$labels else row.names(x) 131 id.method <- id$method 132 id.n <- id$n 133 id.cex <- id$cex 134 id.col <- id$col 135 id.location <- id$location 136 } 137 if (is.null(labels)) labels <- as.character(seq(length.out=nrow(x))) 138 legend <- applyDefaults(legend, defaults=list(coords=NULL), type="legend") 139 if (!(isFALSE(legend) || missing(groups))){ 140 legend.plot <- TRUE 141 legend.pos <- legend$coords 142 } 143 else { 144 legend.plot <- FALSE 145 legend.pos <- NULL 146 } 147 # ellipse 148 ellipse <- applyDefaults(ellipse, defaults=list(levels=c(0.5, 0.95), robust=TRUE, fill=TRUE, fill.alpha=0.2), type="ellipse") 149 if (isFALSE(ellipse)){ 150 levels <- NULL 151 robust <- NULL 152 } 153 else{ 154 levels <- ellipse$levels 155 robust <- ellipse$robust 156 fill <- ellipse$fill 157 fill.alpha <- ellipse$fill.alpha 158 ellipse <- TRUE 159 } 160 # pre 2017 code follows 161# family <- match.arg(family) 162 use <- match.arg(use) 163 na.action <- if (use == "complete.obs") na.omit else na.pass 164 if (!(missing(groups))){ 165 x <- na.action(data.frame(groups, labels, x, stringsAsFactors=FALSE)) 166 # groups <- as.factor(as.character(x[, 1])) 167 groups <- x$groups 168# if (!is.factor(groups)) groups <- as.factor(as.character(x[,1])) 169 labels <- x[, 2] 170 x <- x[, -(1:2)] 171 } 172 else { 173 x <- na.action(data.frame(labels, x, stringsAsFactors=FALSE)) 174 labels <- x[, 1] 175 x <- x[, -1] 176 id.col <- id.col[1] 177 } 178 legendPlot <- function(position="topright"){ 179 usr <- par("usr") 180 legend(position, bg="white", 181 legend=levels(groups), pch=pch, col=col[1:n.groups], 182 cex=cex) 183 } 184 do.legend <- legend.plot 185####### diagonal panel functions 186 # The following panel function adapted from Richard Heiberger 187 panel.adaptiveDensity <- function(x, ...){ 188 args <- applyDefaults(diagonal.args, 189 defaults=list(bw=bw.nrd0, adjust=1, kernel=dnorm, na.rm=TRUE)) 190 if (n.groups > 1){ 191 levs <- levels(groups) 192 for (i in 1:n.groups){ 193 xx <- x[levs[i] == groups] 194 dens.x <- try(adaptiveKernel(xx, adjust = args$adjust, na.rm=args$na.rm, 195 bw=args$bw, kernel=args$kernel), silent=TRUE) 196 if (!inherits(dens.x, "try-error")){ 197 lines(dens.x$x, min(x, na.rm=TRUE) + dens.x$y * 198 diff(range(x, na.rm=TRUE))/diff(range(dens.x$y, na.rm=TRUE)), col=col[i]) 199 } 200 else warning("cannot estimate density for group ", levs[i], "\n", 201 dens.x, "\n") 202 rug(xx, col=col[i]) 203 } 204 } 205 else { 206 dens.x <- adaptiveKernel(x, adjust = args$adjust, na.rm=args$na.rm, 207 bw=args$bw, kernel=args$kernel) 208 lines(dens.x$x, min(x, na.rm=TRUE) + dens.x$y * diff(range(x, na.rm=TRUE))/diff(range(dens.x$y, na.rm=TRUE)), col=col[1]) 209 rug(x) 210 } 211 if (do.legend) legendPlot(position=if (is.null(legend.pos)) "topright" else legend.pos) 212 do.legend <<- FALSE 213 } 214# 215 panel.density <- function(x, ...){ 216 args <- applyDefaults(diagonal.args, 217 defaults=list(bw="nrd0", adjust=1, kernel="gaussian", na.rm=TRUE)) 218 if (n.groups > 1){ 219 levs <- levels(groups) 220 for (i in 1:n.groups){ 221 xx <- x[levs[i] == groups] 222 dens.x <- try(density(xx, adjust = args$adjust, na.rm=args$na.rm, 223 bw=args$bw, kernel=args$kernel), silent=TRUE) 224 if (!inherits(dens.x, "try-error")){ 225 lines(dens.x$x, min(x, na.rm=TRUE) + dens.x$y * 226 diff(range(x, na.rm=TRUE))/diff(range(dens.x$y, na.rm=TRUE)), col=col[i]) 227 } 228 else warning("cannot estimate density for group ", levs[i], "\n", 229 dens.x, "\n") 230 rug(xx, col=col[i]) 231 } 232 } 233 else { 234 dens.x <- density(x, adjust = args$adjust, na.rm=args$na.rm, 235 bw=args$bw, kernel=args$kernel) 236 lines(dens.x$x, min(x, na.rm=TRUE) + dens.x$y * diff(range(x, na.rm=TRUE))/diff(range(dens.x$y, na.rm=TRUE)), col=col[1]) 237 rug(x) 238 } 239 if (do.legend) legendPlot(position=if (is.null(legend.pos)) "topright" else legend.pos) 240 do.legend <<- FALSE 241 } 242 panel.histogram <- function(x, ...){ 243 par(new=TRUE) 244 args <- applyDefaults(diagonal.args, defaults=list(breaks="FD")) 245 h.col <- col[1] 246 if (h.col == "black") h.col <- "gray" 247 hist(x, main="", axes=FALSE, breaks=args$breaks, col=h.col) 248 if (do.legend) legendPlot(position=if (is.null(legend.pos)) "topright" else legend.pos) 249 do.legend <<- FALSE 250 } 251 panel.boxplot <- function(x, ...){ 252 b.col <- col[1:n.groups] 253 b.col[b.col == "black"] <- "gray" 254 par(new=TRUE) 255 if (n.groups == 1) boxplot(x, axes=FALSE, main="", col=col[1]) 256 else boxplot(x ~ groups, axes=FALSE, main="", col=b.col) 257 if (do.legend) legendPlot(position=if (is.null(legend.pos)) "topright" else legend.pos) 258 do.legend <<- FALSE 259 } 260 # The following panel function adapted from Richard Heiberger 261 panel.oned <- function(x, ...) { 262 range <- range(x, na.rm=TRUE) 263 delta <- diff(range)/50 264 y <- mean(range) 265 if (n.groups == 1) segments(x - delta, x, x + delta, x, col = col[1]) 266 else { 267 segments(x - delta, x, x + delta, x, col = col[as.numeric(groups)]) 268 } 269 if (do.legend) legendPlot(position=if (is.null(legend.pos)) "bottomright" else legend.pos) 270 do.legend <<- FALSE 271 } 272 panel.qqplot <- function(x, ...){ 273 par(new=TRUE) 274 if (n.groups == 1) qqnorm(x, axes=FALSE, xlab="", ylab="", main="", col=col[1]) 275 else qqnorm(x, axes=FALSE, xlab="", ylab="", main="", col=col[as.numeric(groups)]) 276 qqline(x, col=col[1]) 277 if (do.legend) legendPlot(position=if (is.null(legend.pos)) "bottomright" else legend.pos) 278 do.legend <<- FALSE 279 } 280 panel.blank <- function(x, ...){ 281 if (do.legend) legendPlot(if (is.null(legend.pos)) "topright" else legend.pos) 282 do.legend <<- FALSE 283 } 284 which.fn <- match(diagonal, 285 c("adaptiveDensity", "density", "boxplot", "histogram", "oned", "qqplot", "none")) 286 if(is.na(which.fn)) stop("incorrect name for the diagonal argument, see ?scatterplotMatrix") 287 diag <- list(panel.adaptiveDensity, panel.density, panel.boxplot, panel.histogram, panel.oned, 288 panel.qqplot, panel.blank)[[which.fn]] 289 groups <- as.factor(if(missing(groups)) rep(1, length(x[, 1])) else groups) 290 counts <- table(groups) 291 if (any(counts == 0)){ 292 levels <- levels(groups) 293 warning("the following groups are empty: ", paste(levels[counts == 0], collapse=", ")) 294 groups <- factor(groups, levels=levels[counts > 0]) 295 } 296# n.groups <- length(levels(groups)) 297 if (n.groups > length(col)) { 298 warning("number of groups exceeds number of available colors\n colors are recycled") 299 col <- rep(col, n.groups) 300 } 301 if (length(col) == 1) col <- rep(col, 3) 302 labs <- labels 303 pairs(x, labels=var.labels, 304 cex.axis=cex.axis, cex.main=cex.main, cex.labels=cex.labels, cex=cex, 305 diag.panel=diag, row1attop = row1attop, 306 panel=function(x, y, ...){ 307 for (i in 1:n.groups){ 308 subs <- groups == levels(groups)[i] 309 if (plot.points) points(x[subs], y[subs], pch=pch[i], col=col[if (n.groups == 1) 1 else i], cex=cex) 310 if (by.groups){ 311 if (is.function(smoother)) smoother(x[subs], y[subs], col=smoother.args$col[i], 312 log.x=FALSE, log.y=FALSE, spread=spread, smoother.args=smoother.args) 313 if (is.function(reg.line)) regLine(reg.line(y[subs] ~ x[subs]), lty=lty, lwd=lwd, col=regLine.args$col[i]) 314 if (ellipse) dataEllipse(x[subs], y[subs], plot.points=FALSE, 315 levels=levels, col=col[i], robust=robust, lwd=1, 316 fill=fill, fill.alpha=fill.alpha) 317 showLabels(x[subs], y[subs], labs[subs], method=id.method, 318 n=id.n, col=col[i], cex=id.cex, location=id.location, 319 all=list(labels=labs, subs=subs)) 320 } 321 } 322 if (!by.groups){ 323 if (is.function(reg.line)) abline(reg.line(y ~ x), lty=lty, lwd=lwd, col=regLine.args$col[1]) 324 if (is.function(smoother)) smoother(x, y, col=col[1], 325 log.x=FALSE, log.y=FALSE, spread=spread, smoother.args=smoother.args) 326 if (ellipse) dataEllipse(x, y, plot.points=FALSE, levels=levels, col=smoother.args$col, 327 robust=robust, lwd=1, fill=fill, fill.alpha=fill.alpha) 328 showLabels(x, y, labs, method=id.method, 329 n=id.n, col=id.col, location=id.location, cex=id.cex) 330 } 331 }, ... 332 ) 333} 334 335spm <- function(x, ...){ 336 scatterplotMatrix(x, ...) 337} 338