1#' A grouped data frame.
2#'
3#' @description
4#' The easiest way to create a grouped data frame is to call the `group_by()`
5#' method on a data frame or tbl: this will take care of capturing
6#' the unevaluated expressions for you.
7#'
8#' These functions are designed for programmatic use. For data analysis
9#' purposes see [group_data()] for the accessor functions that retrieve
10#' various metadata from a grouped data frames.
11#'
12#' @keywords internal
13#' @param data a tbl or data frame.
14#' @param vars A character vector.
15#' @param drop When `.drop = TRUE`, empty groups are dropped.
16#'
17#' @export
18grouped_df <- function(data, vars, drop = group_by_drop_default(data)) {
19  if (!is.data.frame(data)) {
20    abort("`data` must be a data frame.")
21  }
22  if (!is.character(vars)) {
23    abort("`vars` must be a character vector.")
24  }
25
26  if (length(vars) == 0) {
27    as_tibble(data)
28  } else {
29    groups <- compute_groups(data, vars, drop = drop)
30    new_grouped_df(data, groups)
31  }
32}
33
34compute_groups <- function(data, vars, drop = FALSE) {
35  unknown <- setdiff(vars, names(data))
36  if (length(unknown) > 0) {
37    vars <- paste0(encodeString(vars, quote = "`"), collapse = ", ")
38    abort(glue("`vars` missing from `data`: {vars}."))
39  }
40
41  # Only train the dictionary based on selected columns
42  group_vars <- as_tibble(data)[vars]
43  split_key_loc <- vec_split_id_order(group_vars)
44  old_keys <- split_key_loc$key
45  old_rows <- split_key_loc$loc
46
47  signal("", class = "dplyr_regroup")
48
49  groups <- tibble(!!!old_keys, ".rows" := old_rows)
50
51  if (!isTRUE(drop) && any(map_lgl(old_keys, is.factor))) {
52    # Extra work is needed to auto expand empty groups
53
54    uniques <- map(old_keys, function(.) {
55      if (is.factor(.)) . else vec_unique(.)
56    })
57
58    # Internally we only work with integers
59    #
60    # so for any grouping column that is not a factor
61    # we need to match the values to the unique values
62    positions <- map2(old_keys, uniques, function(.x, .y) {
63      if (is.factor(.x)) .x else vec_match(.x, .y)
64    })
65
66    # Expand groups internally adds empty groups recursively
67    # we get back:
68    # - indices: a list of how to vec_slice the current keys
69    #            to get the new keys
70    #
71    # - rows:    the new list of rows (i.e. the same as old rows,
72    #            but with some extra empty integer(0) added for empty groups)
73    expanded <- expand_groups(groups, positions, vec_size(old_keys))
74    new_indices <- expanded$indices
75    new_rows <- expanded$rows
76
77    # Make the new keys from the old keys and the new_indices
78    new_keys <- pmap(list(old_keys, new_indices, uniques), function(key, index, unique) {
79      if (is.factor(key)) {
80        if (is.ordered(key)) {
81          new_ordered(index, levels = levels(key))
82        } else {
83          new_factor(index, levels = levels(key))
84        }
85      } else {
86        vec_slice(unique, index)
87      }
88    })
89    names(new_keys) <- vars
90
91    groups <- tibble(!!!new_keys, ".rows" := new_rows)
92  }
93
94  attr(groups, ".drop") <- drop
95  groups
96}
97
98count_regroups <- function(code) {
99  i <- 0
100  withCallingHandlers(code, dplyr_regroup = function(cnd) {
101    i <<- i + 1
102  })
103  i
104}
105
106show_regroups <- function(code) {
107  withCallingHandlers(code, dplyr_regroup = function(cnd) {
108    cat("Regrouping...\n")
109  })
110}
111
112#' Low-level construction and validation for the grouped_df class
113#'
114#' `new_grouped_df()` is a constructor designed to be high-performance so only
115#' check types, not values. This means it is the caller's responsibility
116#' to create valid values, and hence this is for expert use only.
117#'
118#' @param x A data frame
119#' @param groups The grouped structure, `groups` should be a data frame.
120#' Its last column should be called `.rows` and be
121#' a list of 1 based integer vectors that all are between 1 and the number of rows of `.data`.
122#' @param class additional class, will be prepended to canonical classes of a grouped data frame.
123#' @param check_bounds whether to check all indices for out of bounds problems in grouped_df objects
124#' @param ... additional attributes
125#'
126#' @examples
127#' # 5 bootstrap samples
128#' tbl <- new_grouped_df(
129#'   tibble(x = rnorm(10)),
130#'   groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE))
131#' )
132#' # mean of each bootstrap sample
133#' summarise(tbl, x = mean(x))
134#'
135#' @importFrom tibble new_tibble
136#' @keywords internal
137#' @export
138new_grouped_df <- function(x, groups, ..., class = character()) {
139  if (!is.data.frame(x)) {
140    abort(c(
141      "`new_grouped_df()` incompatible argument.",
142      x = "`x` is not a data frame.")
143    )
144  }
145  if (!is.data.frame(groups) || tail(names(groups), 1L) != ".rows") {
146    abort(c(
147      "`new_grouped_df()` incompatible argument.",
148      i = "`groups` should be a data frame, and its last column be called `.rows`."
149    ))
150  }
151
152  new_tibble(
153    x,
154    groups = groups,
155    ...,
156    nrow = NROW(x),
157    class = c(class, "grouped_df")
158  )
159}
160
161#' @description
162#' `validate_grouped_df()` validates the attributes of a `grouped_df`.
163#'
164#' @rdname new_grouped_df
165#' @export
166validate_grouped_df <- function(x, check_bounds = FALSE) {
167  if (is.null(attr(x, "groups")) && !is.null(attr(x, "vars"))) {
168    abort(c(
169      "Corrupt `grouped_df` using old (< 0.8.0) format.",
170      i = "Strip off old grouping with `ungroup()`."
171    ))
172  }
173
174  result <- .Call(`dplyr_validate_grouped_df`, x, check_bounds)
175  if (!is.null(result)) {
176    abort(result)
177  }
178  x
179}
180
181setOldClass(c("grouped_df", "tbl_df", "tbl", "data.frame"))
182
183#' @rdname grouped_df
184#' @export
185is.grouped_df <- function(x) inherits(x, "grouped_df")
186#' @rdname grouped_df
187#' @export
188is_grouped_df <- is.grouped_df
189
190group_sum <- function(x) {
191  grps <- n_groups(x)
192  paste0(commas(group_vars(x)), " [", big_mark(grps), "]")
193}
194
195#' @export
196tbl_sum.grouped_df <- function(x) {
197  c(
198    NextMethod(),
199    c("Groups" = group_sum(x))
200  )
201}
202
203#' @export
204as.data.frame.grouped_df <- function(x, row.names = NULL,
205                                     optional = FALSE, ...) {
206  new_data_frame(dplyr_vec_data(x), n = nrow(x))
207}
208
209#' @export
210as_tibble.grouped_df <- function(x, ...) {
211  new_tibble(dplyr_vec_data(x), nrow = nrow(x))
212}
213
214#' @importFrom tibble is_tibble
215#' @export
216`[.grouped_df` <- function(x, i, j, drop = FALSE) {
217  out <- NextMethod()
218
219  if (!is.data.frame(out)) {
220    return(out)
221  }
222
223  if (drop) {
224    as_tibble(out)
225  } else {
226    groups <- group_intersect(x, out)
227    if ((missing(i) || nargs() == 2) && identical(groups, group_vars(x))) {
228      new_grouped_df(out, group_data(x))
229    } else {
230      grouped_df(out, groups, group_by_drop_default(x))
231    }
232  }
233}
234
235#' @export
236`$<-.grouped_df` <- function(x, name, ..., value) {
237  out <- NextMethod()
238  if (name %in% group_vars(x)) {
239    grouped_df(out, group_intersect(x, out), group_by_drop_default(x))
240  } else {
241    out
242  }
243}
244
245#' @export
246`[<-.grouped_df` <- function(x, i, j, ..., value) {
247  out <- NextMethod()
248  grouped_df(out, group_intersect(x, out), group_by_drop_default(x))
249}
250
251#' @export
252`[[<-.grouped_df` <- function(x, ..., value) {
253  out <- NextMethod()
254  grouped_df(out, group_intersect(x, out), group_by_drop_default(x))
255}
256
257#' @export
258`names<-.grouped_df` <- function(x, value) {
259  data <- as.data.frame(x)
260  names(data) <- value
261
262  groups <- group_data(x)
263  group_loc <- match(intersect(names(groups), names(x)), names(x))
264  group_names <- c(value[group_loc], ".rows")
265  if (!identical(group_names, names(groups))) {
266    names(groups) <- c(value[group_loc], ".rows")
267  }
268
269  new_grouped_df(data, groups)
270}
271
272#' @method rbind grouped_df
273#' @export
274rbind.grouped_df <- function(...) {
275  bind_rows(...)
276}
277
278#' @method cbind grouped_df
279#' @export
280cbind.grouped_df <- function(...) {
281  bind_cols(...)
282}
283
284group_data_trim <- function(group_data, preserve = FALSE) {
285  if (preserve) {
286    return(group_data)
287  }
288
289  non_empty <- lengths(group_data$".rows") > 0
290  group_data[non_empty, , drop = FALSE]
291}
292
293# Helpers -----------------------------------------------------------------
294
295expand_groups <- function(old_groups, positions, nr) {
296  .Call(`dplyr_expand_groups`, old_groups, positions, nr)
297}
298
299vec_split_id_order <- function(x) {
300  split_id <- vec_group_loc(x)
301  split_id$loc <- new_list_of(split_id$loc, ptype = integer())
302
303  vec_slice(split_id, vec_order(split_id$key))
304}
305
306group_intersect <- function(x, new) {
307  intersect(group_vars(x), names(new))
308}
309
310