1#' Convert an object into a molten data frame.
2#'
3#' This the generic melt function. See the following functions
4#' for the details about different data structures:
5#'
6#' \itemize{
7#'   \item \code{\link{melt.data.frame}} for data.frames
8#'   \item \code{\link{melt.array}} for arrays, matrices and tables
9#'   \item \code{\link{melt.list}} for lists
10#' }
11#'
12#' @keywords manip
13#' @param data Data set to melt
14#' @param na.rm Should NA values be removed from the data set? This will
15#'   convert explicit missings to implicit missings.
16#' @param ... further arguments passed to or from other methods.
17#' @param value.name name of variable used to store values
18#' @seealso \code{\link{cast}}
19#' @export
20melt <- function(data, ..., na.rm = FALSE, value.name = "value") {
21  UseMethod("melt", data)
22}
23
24#' Melt a vector.
25#' For vectors, makes a column of a data frame
26#'
27#' @param data vector to melt
28#' @param na.rm Should NA values be removed from the data set? This will
29#'   convert explicit missings to implicit missings.
30#' @param ... further arguments passed to or from other methods.
31#' @param value.name name of variable used to store values
32#' @keywords manip
33#' @seealso \code{\link{melt}}, \code{\link{cast}}
34#' @family melt methods
35#' @export
36melt.default <- function(data, ..., na.rm = FALSE, value.name = "value") {
37  if (na.rm) data <- data[!is.na(data)]
38  setNames(data.frame(data), value.name)
39}
40
41#' Melt a list by recursively melting each component.
42#'
43#' @keywords manip
44#' @param data list to recursively melt
45#' @param ... further arguments passed to or from other methods.
46#' @param level list level - used for creating labels
47#' @seealso \code{\link{cast}}
48#' @family melt methods
49#' @export
50#' @examples
51#' a <- as.list(c(1:4, NA))
52#' melt(a)
53#' names(a) <- letters[1:4]
54#' melt(a)
55#' a <- list(matrix(1:4, ncol=2), matrix(1:6, ncol=2))
56#' melt(a)
57#' a <- list(matrix(1:4, ncol=2), array(1:27, c(3,3,3)))
58#' melt(a)
59#' melt(list(1:5, matrix(1:4, ncol=2)))
60#' melt(list(list(1:3), 1, list(as.list(3:4), as.list(1:2))))
61melt.list <- function(data, ..., level = 1) {
62  parts <- lapply(data, melt, level = level + 1, ...)
63  result <- rbind.fill(parts)
64
65  # Add labels
66  names <- names(data) %||% seq_along(data)
67  lengths <- vapply(parts, nrow, integer(1))
68  labels <- rep(names, lengths)
69
70  label_var <- attr(data, "varname") %||% paste("L", level, sep = "")
71  result[[label_var]] <- labels
72
73  # result <- cbind(labels, result)
74  # result[, c(setdiff(names(result), "value"), "value")]
75
76  result
77}
78
79#' Melt a data frame into form suitable for easy casting.
80#'
81#' You need to tell melt which of your variables are id variables, and which
82#' are measured variables. If you only supply one of \code{id.vars} and
83#' \code{measure.vars}, melt will assume the remainder of the variables in the
84#' data set belong to the other. If you supply neither, melt will assume
85#' factor and character variables are id variables, and all others are
86#' measured.
87#'
88#' @param data data frame to melt
89#' @param id.vars vector of id variables. Can be integer (variable position)
90#'   or string (variable name). If blank, will use all non-measured variables.
91#' @param measure.vars vector of measured variables. Can be integer (variable
92#'   position) or string (variable name)If blank, will use all non id.vars
93#    variables.
94#' @param variable.name name of variable used to store measured variable names
95#' @param value.name name of variable used to store values
96#' @param na.rm Should NA values be removed from the data set? This will
97#'   convert explicit missings to implicit missings.
98#' @param ... further arguments passed to or from other methods.
99#' @param factorsAsStrings Control whether factors are converted to character
100#'   when melted as measure variables. When \code{FALSE}, coercion is forced if
101#'   levels are not identical across the \code{measure.vars}.
102#' @family melt methods
103#' @keywords manip
104#' @seealso \code{\link{cast}}
105#' @export
106#' @examples
107#' names(airquality) <- tolower(names(airquality))
108#' melt(airquality, id=c("month", "day"))
109#' names(ChickWeight) <- tolower(names(ChickWeight))
110#' melt(ChickWeight, id=2:4)
111melt.data.frame <- function(data, id.vars, measure.vars, variable.name = "variable", ..., na.rm = FALSE, value.name = "value", factorsAsStrings = TRUE) {
112
113  ## Get the names of id.vars, measure.vars
114  vars <- melt_check(data, id.vars, measure.vars, variable.name, value.name)
115
116  ## Match them to indices in the data
117  id.ind <- match(vars$id, names(data))
118  measure.ind <- match(vars$measure, names(data))
119
120  ## Return early if we have id.ind but no measure.ind
121  if (!length(measure.ind)) {
122    return(data[id.vars])
123  }
124
125  ## Get the attributes if common, NULL if not.
126  args <- normalize_melt_arguments(data, measure.ind, factorsAsStrings)
127  measure.attributes <- args$measure.attributes
128  factorsAsStrings <- args$factorsAsStrings
129  valueAsFactor <- "factor" %in% measure.attributes$class
130
131  df <- melt_dataframe(
132    data,
133    as.integer(id.ind-1),
134    as.integer(measure.ind-1),
135    as.character(variable.name),
136    as.character(value.name),
137    as.pairlist(measure.attributes),
138    as.logical(factorsAsStrings),
139    as.logical(valueAsFactor)
140  )
141
142  if (na.rm) {
143    return(df[ !is.na(df[[value.name]]), ])
144  } else {
145    return(df)
146  }
147}
148
149#' Melt an array.
150#'
151#' This code is conceptually similar to \code{\link{as.data.frame.table}}
152#'
153#' @param data array to melt
154#' @param varnames variable names to use in molten data.frame
155#' @param ... further arguments passed to or from other methods.
156#' @param as.is if \code{FALSE}, the default, dimnames will be converted
157#'   using \code{\link{type.convert}}. If \code{TRUE}, they will be left
158#'   as strings.
159#' @param value.name name of variable used to store values
160#' @param na.rm Should NA values be removed from the data set? This will
161#'   convert explicit missings to implicit missings.
162#' @keywords manip
163#' @export
164#' @seealso \code{\link{cast}}
165#' @family melt methods
166#' @examples
167#' a <- array(c(1:23, NA), c(2,3,4))
168#' melt(a)
169#' melt(a, na.rm = TRUE)
170#' melt(a, varnames=c("X","Y","Z"))
171#' dimnames(a) <- lapply(dim(a), function(x) LETTERS[1:x])
172#' melt(a)
173#' melt(a, varnames=c("X","Y","Z"))
174#' dimnames(a)[1] <- list(NULL)
175#' melt(a)
176melt.array <- function(data, varnames = names(dimnames(data)), ...,
177                       na.rm = FALSE, as.is = FALSE, value.name = "value") {
178  var.convert <- function(x) {
179    if (!is.character(x)) return(x)
180
181    x <- type.convert(x, as.is = TRUE)
182    if (!is.character(x)) return(x)
183
184    factor(x, levels = unique(x))
185  }
186
187  dn <- amv_dimnames(data)
188  names(dn) <- varnames
189  if (!as.is) {
190    dn <- lapply(dn, var.convert)
191  }
192
193  labels <- expand.grid(dn, KEEP.OUT.ATTRS = FALSE,
194    stringsAsFactors = FALSE)
195
196  if (na.rm) {
197    missing <- is.na(data)
198    data <- data[!missing]
199    labels <- labels[!missing, ]
200  }
201
202  value_df <- setNames(data.frame(as.vector(data)), value.name)
203  cbind(labels, value_df)
204}
205
206#' @rdname melt.array
207#' @export
208melt.table <- melt.array
209
210#' @rdname melt.array
211#' @export
212melt.matrix <- melt.array
213
214#' Check that input variables to melt are appropriate.
215#'
216#' If id.vars or measure.vars are missing, \code{melt_check} will do its
217#' best to impute them. If you only supply one of id.vars and measure.vars,
218#' melt will assume the remainder of the variables in the data set belong to
219#' the other. If you supply neither, melt will assume discrete variables are
220#' id variables and all other are measured.
221#'
222#' @param data data frame
223#' @param id.vars vector of identifying variable names or indexes
224#' @param measure.vars vector of Measured variable names or indexes
225#' @param variable.name name of variable used to store measured variable names
226#' @param value.name name of variable used to store values
227#' @return a list giving id and measure variables names.
228melt_check <- function(data, id.vars, measure.vars, variable.name, value.name) {
229  varnames <- names(data)
230
231  # Convert positions to names
232  if (!missing(id.vars) && is.numeric(id.vars)) {
233    id.vars <- varnames[id.vars]
234  }
235  if (!missing(measure.vars) && is.numeric(measure.vars)) {
236    measure.vars <- varnames[measure.vars]
237  }
238
239  # Check that variables exist
240  if (!missing(id.vars)) {
241    unknown <- setdiff(id.vars, varnames)
242    if (length(unknown) > 0) {
243      vars <- paste(unknown, collapse=", ")
244      stop("id variables not found in data: ", vars, call. = FALSE)
245    }
246  }
247
248  if (!missing(measure.vars)) {
249    unknown <- setdiff(measure.vars, varnames)
250    if (length(unknown) > 0) {
251      vars <- paste(unknown, collapse=", ")
252      stop("measure variables not found in data: ", vars, call. = FALSE)
253    }
254  }
255
256  # Fill in missing pieces
257  if (missing(id.vars) && missing(measure.vars)) {
258    discrete <- sapply(data, is.discrete)
259    id.vars <- varnames[discrete]
260    measure.vars <- varnames[!discrete]
261    if (length(id.vars) != 0) {
262      message("Using ", paste(id.vars, collapse = ", "), " as id variables")
263    } else {
264      message("No id variables; using all as measure variables")
265    }
266  } else if (missing(id.vars)) {
267    id.vars <- setdiff(varnames, measure.vars)
268  } else if (missing(measure.vars)) {
269    measure.vars <- setdiff(varnames, id.vars)
270  }
271
272  # Ensure variable names are characters of length one
273  if (!is.string(variable.name))
274    stop("'variable.name' should be a string", call. = FALSE)
275  if (!is.string(value.name))
276    stop("'value.name' should be a string", call. = FALSE)
277
278  list(id = id.vars, measure = measure.vars)
279}
280