1 2#' Missingness Map 3#' 4#' Plots a missingness map showing where missingness occurs in 5#' the dataset passed to \code{amelia}. 6#' 7#' @param obj an object of class "amelia"; typically output from the 8#' function \code{amelia}, a matrix or a dataframe. 9#' @param vars a vector of column numbers or column names of the data 10#' to include in the plot. The default is to plot all variables. 11#' @param legend should a legend be drawn? (True or False) 12#' @param col a vector of length two where the first element specifies 13#' the color for missing cells and the second element specifies 14#" the color for observed cells. 15#' @param main main title of the plot. Defaults to "Missingness Map". 16#' @param y.cex expansion for the variables names on the x-axis. 17#' @param x.cex expansion for the unit names on the y-axis. 18#' @param y.labels a vector of row labels to print on the y-axis 19#' @param y.at a vector of the same length as \code{y.labels} with row 20#' nmumbers associated with the labels. 21#' @param csvar column number or name of the variable corresponding to 22#' the unit indicator. Only used when the \code{obj} is not of class 23#' \code{amelia}. 24#' @param tsvar column number or name of the variable corresponding to 25#' the time indicator. Only used when the \code{obj} is not of class 26#' \code{amelia}. 27#' @param rank.order a logical value. If \code{TRUE}, the default, then 28#' the order of the variables along the the x-axis is sorted by the 29#' percent missing (from highest to lowest). If \code{FALSE}, it is 30#' simply the order of the variables in the data. 31#' @param margins a vector of length two that specifies the bottom and 32#' left margins of the plot. Useful for when variable names or 33#' row names are long. 34#' @param gap.xaxis value to pass to the \code{gap.axis} argument of 35#' the \code{axis} function that plots the x-axis. See 36#' \code{\link{axis}} for more details. Ignored on R versions less 37#' than 4.0.0. 38#' @param x.las value of the \code{las} argument to pass to the 39#' \code{\link{axis}} function creating the x-axis. 40#' @param ... further graphical arguments. 41#' 42#' @details \code{missmap} draws a map of the missingness in a dataset using the 43#' \code{image} function. The columns are reordered to put the most 44#' missing variable farthest to the left. The rows are reordered to a 45#' unit-period order if the \code{ts} and \code{cs} arguments were passed 46#' to \code{amelia}. If not, the rows are not reordered. 47#' 48#' The \code{y.labels} and \code{y.at} commands can be used to associate 49#' labels with rows in the data to identify them in the plot. The y-axis 50#' is internally inverted so that the first row of the data is associated 51#' with the top-most row of the missingness map. The values of 52#' \code{y.at} should refer to the rows of the data, not to any point on 53#' the plotting region. 54#' 55#' @seealso \code{\link{compare.density}}, \code{\link{overimpute}}, 56#' \code{\link{tscsPlot}}, \code{\link{image}}, \code{\link{heatmap}} 57missmap <- function(obj, vars, legend = TRUE, col, main, 58 y.cex = 0.8, x.cex = 0.8, y.labels, y.at, csvar = NULL, 59 tsvar = NULL, rank.order = TRUE, margins = c(5, 5), 60 gap.xaxis = 1, x.las = 2, ...) { 61 62 63 if (inherits(obj, "amelia")) { 64 vnames <- colnames(obj$imputations[[1]]) 65 n <- nrow(obj$missMatrix) 66 p <- ncol(obj$missMatrix) 67 percent.missing <- colMeans(obj$missMatrix) 68 pmiss.all <- mean(obj$missMatrix) 69 r1 <- obj$missMatrix 70 } else { 71 vnames <- colnames(obj) 72 n <- nrow(obj) 73 p <- ncol(obj) 74 percent.missing <- colMeans(is.na(obj)) 75 pmiss.all <- mean(is.na(obj)) 76 r1 <- 1 * is.na(obj) 77 } 78 79 if (missing(col)) col <- c("#eff3ff", "#2171b5") 80 if (!missing(vars)) { 81 if (is.character(vars)) { 82 vars <- match(vars, vnames) 83 if (any(is.na(vars))) { 84 stop("vars not found in the data") 85 } 86 } 87 if (any(!(vars %in% 1:p))) { 88 stop("vars outside range of the data") 89 } 90 p <- length(vars) 91 r1 <- r1[, vars] 92 percent.missing <- percent.missing[vars] 93 pmiss.all <- mean(r1) 94 } 95 96 if (!missing(y.labels) && 97 (missing(y.at) && (length(y.labels) != n))) { 98 stop("y.at must accompany y.labels if there is less than onefor each row") 99 } 100 101 if (is.null(csvar)) csvar <- obj$arguments$cs 102 if (is.null(tsvar)) tsvar <- obj$arguments$ts 103 104 if (missing(y.labels)) { 105 if (!is.null(csvar)) { 106 if (class(obj) == "amelia") { 107 cs <- obj$imputations[[1]][, csvar] 108 } else { 109 cs <- obj[, csvar] 110 } 111 y.labels <- cs 112 if (is.factor(y.labels)) y.labels <- levels(y.labels)[unclass(y.labels)] 113 114 cs.names <- y.labels 115 116 117 if (!is.numeric(cs)) cs <- as.numeric(as.factor(cs)) 118 if (!is.null(tsvar)) { 119 if (class(obj) == "amelia") { 120 ts <- as.numeric(obj$imputations[[1]][, tsvar]) 121 } else { 122 ts <- as.numeric(obj[, tsvar]) 123 } 124 unit.period <- order(cs, ts) 125 } else { 126 unit.period <- 1:n 127 } 128 129 y.labels <- y.labels[unit.period] 130 r1 <- r1[unit.period, ] 131 132 brks <- c(TRUE,rep(FALSE, times = (n-1))) 133 for (i in 2:n) { 134 brks[i] <- (cs[unit.period][i] != cs[unit.period][i - 1]) 135 } 136 y.at <- which(brks) 137 138 y.labels <- y.labels[brks] 139 } else { 140 y.labels <- row.names(obj$imputations[[1]]) 141 y.at <- seq(1, n, by = 15) 142 y.labels <- y.labels[y.at] 143 } 144 } else { 145 if (missing(y.at)) 146 y.at <- n:1 147 } 148 missrank <- rev(order(percent.missing)) 149 if (rank.order) { 150 chess <- t(!r1[n:1, missrank]) 151 vnames <- vnames[missrank] 152 } else { 153 chess <- t(!r1[n:1, ]) 154 } 155 y.at <- (n:1)[y.at] 156 157 if (missing(main)) 158 main <- "Missingness Map" 159 160 par(mar = c(margins, 2, 1) + 0.1) 161 ## here we fork for data/tscs type plots. users cant set this yet. 162 type <- "data" 163 if (legend) { 164 graphics::layout(matrix(c(1, 2), nrow = 1), widths = c(0.75, 0.25)) 165 par(mar = c(margins, 2, 0) + 0.1, mgp = c(3, 0.25, 0)) 166 } 167 if (type == "data") { 168 169 col.fix <- col 170 if (sum(!chess) == 0) { 171 col.fix <- col[2] 172 } 173 image(x = 1:(p), y = 1:n, z = chess, axes = FALSE, 174 col = col.fix, xlab = "", ylab = "", main = main) 175 if (getRversion() >= "4.0.0") { 176 axis(1, lwd = 0, labels = vnames, las = x.las, at = 1:p, cex.axis = x.cex, 177 gap.axis = gap.xaxis) 178 } else { 179 axis(1, lwd = 0, labels = vnames, las = x.las, at = 1:p, cex.axis = x.cex) 180 } 181 axis(2, lwd = 0, labels = y.labels, las = 1, at = y.at, cex.axis = y.cex) 182 183 184 if (legend) { 185 pm.lab <- paste("Missing (", round(100 * pmiss.all), "%)", sep = "") 186 po.lab <- paste("Observed (", 100 - round(100 * pmiss.all), "%)", 187 sep = "") 188 par(mar = c(0, 0, 0, 0.3)) 189 plot(0, 0, type = "n", axes = FALSE, ann = FALSE) 190 legend("left", col = col, bty = "n", xjust = 0, border = "grey", 191 legend = c(pm.lab, po.lab), fill = col, horiz = FALSE) 192 } 193 } else { 194 tscsdata <- data.frame(cs.names, ts, rowMeans(r1)) 195 tscsdata <- reshape(tscsdata, idvar = "cs.names", timevar = "ts", 196 direction = "wide") 197 rownames(tscsdata) <- tscsdata[, 1] 198 colnames(tscsdata) <- unique(ts) 199 tscsdata <- as.matrix(tscsdata[, -1]) 200 201 cols <- rev(heat.colors(5)) 202 203 image(z = t(tscsdata), axes = FALSE, col = cols, main = main, 204 ylab = "", xlab = "") 205 at.seq <- seq(from = 0, to = 1, length = ncol(tscsdata)) 206 axis(1, labels = unique(ts), at = at.seq, tck = 0, lwd = 0, las = 2) 207 axis(2, labels = rownames(tscsdata), at = at.seq, tck = 0, lwd = 0, 208 las = 1, cex.axis = .8) 209 210 if (legend) { 211 leg.names <- c("0-0.2", "0.2-0.4", "0.4-0.6", "0.6-0.8", "0.8-1") 212 legend(x = 0.95, y = 1.01, col = cols, bty = "n", xjust = 1, 213 legend = leg.names, fill = cols, horiz = TRUE) 214 } 215 } 216 217 invisible(NULL) 218} 219