1#' A grouped data frame. 2#' 3#' @description 4#' The easiest way to create a grouped data frame is to call the `group_by()` 5#' method on a data frame or tbl: this will take care of capturing 6#' the unevaluated expressions for you. 7#' 8#' These functions are designed for programmatic use. For data analysis 9#' purposes see [group_data()] for the accessor functions that retrieve 10#' various metadata from a grouped data frames. 11#' 12#' @keywords internal 13#' @param data a tbl or data frame. 14#' @param vars A character vector. 15#' @param drop When `.drop = TRUE`, empty groups are dropped. 16#' 17#' @export 18grouped_df <- function(data, vars, drop = group_by_drop_default(data)) { 19 if (!is.data.frame(data)) { 20 abort("`data` must be a data frame.") 21 } 22 if (!is.character(vars)) { 23 abort("`vars` must be a character vector.") 24 } 25 26 if (length(vars) == 0) { 27 as_tibble(data) 28 } else { 29 groups <- compute_groups(data, vars, drop = drop) 30 new_grouped_df(data, groups) 31 } 32} 33 34compute_groups <- function(data, vars, drop = FALSE) { 35 unknown <- setdiff(vars, names(data)) 36 if (length(unknown) > 0) { 37 vars <- paste0(encodeString(vars, quote = "`"), collapse = ", ") 38 abort(glue("`vars` missing from `data`: {vars}.")) 39 } 40 41 # Only train the dictionary based on selected columns 42 group_vars <- as_tibble(data)[vars] 43 split_key_loc <- vec_split_id_order(group_vars) 44 old_keys <- split_key_loc$key 45 old_rows <- split_key_loc$loc 46 47 signal("", class = "dplyr_regroup") 48 49 groups <- tibble(!!!old_keys, ".rows" := old_rows) 50 51 if (!isTRUE(drop) && any(map_lgl(old_keys, is.factor))) { 52 # Extra work is needed to auto expand empty groups 53 54 uniques <- map(old_keys, function(.) { 55 if (is.factor(.)) . else vec_unique(.) 56 }) 57 58 # Internally we only work with integers 59 # 60 # so for any grouping column that is not a factor 61 # we need to match the values to the unique values 62 positions <- map2(old_keys, uniques, function(.x, .y) { 63 if (is.factor(.x)) .x else vec_match(.x, .y) 64 }) 65 66 # Expand groups internally adds empty groups recursively 67 # we get back: 68 # - indices: a list of how to vec_slice the current keys 69 # to get the new keys 70 # 71 # - rows: the new list of rows (i.e. the same as old rows, 72 # but with some extra empty integer(0) added for empty groups) 73 expanded <- expand_groups(groups, positions, vec_size(old_keys)) 74 new_indices <- expanded$indices 75 new_rows <- expanded$rows 76 77 # Make the new keys from the old keys and the new_indices 78 new_keys <- pmap(list(old_keys, new_indices, uniques), function(key, index, unique) { 79 if (is.factor(key)) { 80 if (is.ordered(key)) { 81 new_ordered(index, levels = levels(key)) 82 } else { 83 new_factor(index, levels = levels(key)) 84 } 85 } else { 86 vec_slice(unique, index) 87 } 88 }) 89 names(new_keys) <- vars 90 91 groups <- tibble(!!!new_keys, ".rows" := new_rows) 92 } 93 94 attr(groups, ".drop") <- drop 95 groups 96} 97 98count_regroups <- function(code) { 99 i <- 0 100 withCallingHandlers(code, dplyr_regroup = function(cnd) { 101 i <<- i + 1 102 }) 103 i 104} 105 106show_regroups <- function(code) { 107 withCallingHandlers(code, dplyr_regroup = function(cnd) { 108 cat("Regrouping...\n") 109 }) 110} 111 112#' Low-level construction and validation for the grouped_df class 113#' 114#' `new_grouped_df()` is a constructor designed to be high-performance so only 115#' check types, not values. This means it is the caller's responsibility 116#' to create valid values, and hence this is for expert use only. 117#' 118#' @param x A data frame 119#' @param groups The grouped structure, `groups` should be a data frame. 120#' Its last column should be called `.rows` and be 121#' a list of 1 based integer vectors that all are between 1 and the number of rows of `.data`. 122#' @param class additional class, will be prepended to canonical classes of a grouped data frame. 123#' @param check_bounds whether to check all indices for out of bounds problems in grouped_df objects 124#' @param ... additional attributes 125#' 126#' @examples 127#' # 5 bootstrap samples 128#' tbl <- new_grouped_df( 129#' tibble(x = rnorm(10)), 130#' groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) 131#' ) 132#' # mean of each bootstrap sample 133#' summarise(tbl, x = mean(x)) 134#' 135#' @importFrom tibble new_tibble 136#' @keywords internal 137#' @export 138new_grouped_df <- function(x, groups, ..., class = character()) { 139 if (!is.data.frame(x)) { 140 abort(c( 141 "`new_grouped_df()` incompatible argument.", 142 x = "`x` is not a data frame.") 143 ) 144 } 145 if (!is.data.frame(groups) || tail(names(groups), 1L) != ".rows") { 146 abort(c( 147 "`new_grouped_df()` incompatible argument.", 148 i = "`groups` should be a data frame, and its last column be called `.rows`." 149 )) 150 } 151 152 new_tibble( 153 x, 154 groups = groups, 155 ..., 156 nrow = NROW(x), 157 class = c(class, "grouped_df") 158 ) 159} 160 161#' @description 162#' `validate_grouped_df()` validates the attributes of a `grouped_df`. 163#' 164#' @rdname new_grouped_df 165#' @export 166validate_grouped_df <- function(x, check_bounds = FALSE) { 167 if (is.null(attr(x, "groups")) && !is.null(attr(x, "vars"))) { 168 abort(c( 169 "Corrupt `grouped_df` using old (< 0.8.0) format.", 170 i = "Strip off old grouping with `ungroup()`." 171 )) 172 } 173 174 result <- .Call(`dplyr_validate_grouped_df`, x, check_bounds) 175 if (!is.null(result)) { 176 abort(result) 177 } 178 x 179} 180 181setOldClass(c("grouped_df", "tbl_df", "tbl", "data.frame")) 182 183#' @rdname grouped_df 184#' @export 185is.grouped_df <- function(x) inherits(x, "grouped_df") 186#' @rdname grouped_df 187#' @export 188is_grouped_df <- is.grouped_df 189 190group_sum <- function(x) { 191 grps <- n_groups(x) 192 paste0(commas(group_vars(x)), " [", big_mark(grps), "]") 193} 194 195#' @export 196tbl_sum.grouped_df <- function(x) { 197 c( 198 NextMethod(), 199 c("Groups" = group_sum(x)) 200 ) 201} 202 203#' @export 204as.data.frame.grouped_df <- function(x, row.names = NULL, 205 optional = FALSE, ...) { 206 new_data_frame(dplyr_vec_data(x), n = nrow(x)) 207} 208 209#' @export 210as_tibble.grouped_df <- function(x, ...) { 211 new_tibble(dplyr_vec_data(x), nrow = nrow(x)) 212} 213 214#' @importFrom tibble is_tibble 215#' @export 216`[.grouped_df` <- function(x, i, j, drop = FALSE) { 217 out <- NextMethod() 218 219 if (!is.data.frame(out)) { 220 return(out) 221 } 222 223 if (drop) { 224 as_tibble(out) 225 } else { 226 groups <- group_intersect(x, out) 227 if ((missing(i) || nargs() == 2) && identical(groups, group_vars(x))) { 228 new_grouped_df(out, group_data(x)) 229 } else { 230 grouped_df(out, groups, group_by_drop_default(x)) 231 } 232 } 233} 234 235#' @export 236`$<-.grouped_df` <- function(x, name, ..., value) { 237 out <- NextMethod() 238 if (name %in% group_vars(x)) { 239 grouped_df(out, group_intersect(x, out), group_by_drop_default(x)) 240 } else { 241 out 242 } 243} 244 245#' @export 246`[<-.grouped_df` <- function(x, i, j, ..., value) { 247 out <- NextMethod() 248 grouped_df(out, group_intersect(x, out), group_by_drop_default(x)) 249} 250 251#' @export 252`[[<-.grouped_df` <- function(x, ..., value) { 253 out <- NextMethod() 254 grouped_df(out, group_intersect(x, out), group_by_drop_default(x)) 255} 256 257#' @export 258`names<-.grouped_df` <- function(x, value) { 259 data <- as.data.frame(x) 260 names(data) <- value 261 262 groups <- group_data(x) 263 group_loc <- match(intersect(names(groups), names(x)), names(x)) 264 group_names <- c(value[group_loc], ".rows") 265 if (!identical(group_names, names(groups))) { 266 names(groups) <- c(value[group_loc], ".rows") 267 } 268 269 new_grouped_df(data, groups) 270} 271 272#' @method rbind grouped_df 273#' @export 274rbind.grouped_df <- function(...) { 275 bind_rows(...) 276} 277 278#' @method cbind grouped_df 279#' @export 280cbind.grouped_df <- function(...) { 281 bind_cols(...) 282} 283 284group_data_trim <- function(group_data, preserve = FALSE) { 285 if (preserve) { 286 return(group_data) 287 } 288 289 non_empty <- lengths(group_data$".rows") > 0 290 group_data[non_empty, , drop = FALSE] 291} 292 293# Helpers ----------------------------------------------------------------- 294 295expand_groups <- function(old_groups, positions, nr) { 296 .Call(`dplyr_expand_groups`, old_groups, positions, nr) 297} 298 299vec_split_id_order <- function(x) { 300 split_id <- vec_group_loc(x) 301 split_id$loc <- new_list_of(split_id$loc, ptype = integer()) 302 303 vec_slice(split_id, vec_order(split_id$key)) 304} 305 306group_intersect <- function(x, new) { 307 intersect(group_vars(x), names(new)) 308} 309 310