1df <- data.frame(x = rep(1:3, each = 10), y = rep(1:6, each = 5))
2
3test_that("group_by with .add = TRUE adds groups", {
4  add_groups1 <- function(tbl) group_by(tbl, x, y, .add = TRUE)
5  add_groups2 <- function(tbl) group_by(group_by(tbl, x, .add = TRUE), y, .add = TRUE)
6
7  expect_equal(group_vars(add_groups1(df)), c("x", "y"))
8  expect_equal(group_vars(add_groups2(df)), c("x", "y"))
9})
10
11test_that("add = TRUE is deprecated", {
12  rlang::scoped_options(lifecycle_verbosity = "warning")
13
14  df <- tibble(x = 1, y = 2)
15
16  expect_warning(
17    out <- df %>% group_by(x) %>% group_by(y, add = TRUE),
18    "deprecated"
19  )
20  expect_equal(group_vars(out), c("x", "y"))
21})
22
23test_that("joins preserve grouping", {
24  g <- group_by(df, x)
25
26  expect_equal(group_vars(inner_join(g, g, by = c("x", "y"))), "x")
27  expect_equal(group_vars(left_join(g, g, by = c("x", "y"))), "x")
28  expect_equal(group_vars(semi_join(g, g, by = c("x", "y"))), "x")
29  expect_equal(group_vars(anti_join(g, g, by = c("x", "y"))), "x")
30})
31
32test_that("constructors drops groups", {
33  df <- data.frame(x = 1:3) %>% group_by(x)
34  expect_equal(group_vars(as_tibble(df)), character())
35})
36
37test_that("grouping by constant adds column (#410)", {
38  grouped <- group_by(mtcars, "cyl") %>% summarise(foo = n())
39  expect_equal(names(grouped), c('"cyl"', "foo"))
40  expect_equal(nrow(grouped), 1L)
41})
42
43test_that(".dots is soft deprecated", {
44  rlang::scoped_options(lifecycle_verbosity = "warning")
45
46  df <- tibble(x = 1, y = 1)
47  expect_warning(gf <- group_by(df, .dots = "x"), "deprecated")
48})
49
50# Test full range of variable types --------------------------------------------
51
52
53test_that("local group_by preserves variable types", {
54  df_var <- tibble(
55    l = c(T, F),
56    i = 1:2,
57    d = Sys.Date() + 1:2,
58    f = factor(letters[1:2]),
59    num = 1:2 + 0.5,
60    t = Sys.time() + 1:2,
61    c = letters[1:2]
62  )
63  attr(df_var$t, "tzone") <- ""
64
65  for (var in names(df_var)) {
66    expected <- tibble(!!var := sort(unique(df_var[[var]])), n = 1L)
67
68    summarised <- df_var %>% group_by(!!sym(var)) %>% summarise(n = n())
69    expect_equal(summarised, expected)
70  }
71})
72
73test_that("mutate does not loose variables (#144)", {
74  df <- tibble(a = rep(1:4, 2), b = rep(1:4, each = 2), x = runif(8))
75  by_ab <- group_by(df, a, b)
76  by_a <- summarise(by_ab, x = sum(x), .groups = "drop_last")
77  by_a_quartile <- group_by(by_a, quartile = ntile(x, 4))
78
79  expect_equal(names(by_a_quartile), c("a", "b", "x", "quartile"))
80})
81
82test_that("group_by uses shallow copy", {
83  m1 <- group_by(mtcars, cyl)
84  expect_equal(group_vars(mtcars), character())
85
86  expect_equal(
87    lobstr::obj_addrs(mtcars),
88    lobstr::obj_addrs(m1)
89  )
90})
91
92test_that("group_by orders by groups. #242", {
93  df <- data.frame(a = sample(1:10, 3000, replace = TRUE)) %>% group_by(a)
94  expect_equal(group_data(df)$a, 1:10)
95
96  df <- data.frame(a = sample(letters[1:10], 3000, replace = TRUE), stringsAsFactors = FALSE) %>% group_by(a)
97  expect_equal(group_data(df)$a, letters[1:10])
98
99  df <- data.frame(a = sample(sqrt(1:10), 3000, replace = TRUE)) %>% group_by(a)
100  expect_equal(group_data(df)$a, sqrt(1:10))
101})
102
103test_that("Can group_by() a POSIXlt", {
104  skip_if_not_installed("tibble", "2.99.99")
105  df <- tibble(x = 1:5, times = as.POSIXlt(seq.Date(Sys.Date(), length.out = 5, by = "day")))
106  g <- group_by(df, times)
107  expect_equal(nrow(group_data(g)), 5L)
108})
109
110test_that("group_by() handles list as grouping variables", {
111  df <- tibble(x = 1:3, y = list(1:2, 1:3, 1:2))
112  gdata <- group_data(group_by(df, y))
113  expect_equal(nrow(gdata), 2L)
114  expect_equal(gdata$y, list(1:2, 1:3))
115  expect_equal(gdata$.rows, list_of(c(1L, 3L), 2L))
116})
117
118test_that("select(group_by(.)) implicitely adds grouping variables (#170)", {
119  expect_snapshot(
120    res <- mtcars %>% group_by(vs) %>% select(mpg)
121  )
122  expect_equal(names(res), c("vs", "mpg"))
123})
124
125test_that("group_by only creates one group for NA (#401)", {
126  x <- as.numeric(c(NA, NA, NA, 10:1, 10:1))
127  w <- c(20, 30, 40, 1:10, 1:10) * 10
128
129  n_distinct(x) # 11 OK
130  res <- data.frame(x = x, w = w) %>% group_by(x) %>% summarise(n = n())
131  expect_equal(nrow(res), 11L)
132})
133
134test_that("there can be 0 groups (#486)", {
135  data <- tibble(a = numeric(0), g = character(0)) %>% group_by(g)
136  expect_equal(length(data$a), 0L)
137  expect_equal(length(data$g), 0L)
138  expect_equal(map_int(group_rows(data), length), integer(0))
139})
140
141test_that("group_by works with zero-row data frames (#486)", {
142  df <- data.frame(a = numeric(0), b = numeric(0), g = character(0))
143  dfg <- group_by(df, g, .drop = FALSE)
144  expect_equal(dim(dfg), c(0, 3))
145  expect_equal(group_vars(dfg), "g")
146  expect_equal(group_size(dfg), integer(0))
147
148  x <- summarise(dfg, n = n())
149  expect_equal(dim(x), c(0, 2))
150  expect_equal(group_vars(x), character())
151
152  x <- mutate(dfg, c = b + 1)
153  expect_equal(dim(x), c(0, 4))
154  expect_equal(group_vars(x), "g")
155  expect_equal(group_size(x), integer(0))
156
157  x <- filter(dfg, a == 100)
158  expect_equal(dim(x), c(0, 3))
159  expect_equal(group_vars(x), "g")
160  expect_equal(group_size(x), integer(0))
161
162  x <- arrange(dfg, a, g)
163  expect_equal(dim(x), c(0, 3))
164  expect_equal(group_vars(x), "g")
165  expect_equal(group_size(x), integer(0))
166
167  expect_snapshot(
168    x <- select(dfg, a) # Only select 'a' column; should result in 'g' and 'a'
169  )
170  expect_equal(dim(x), c(0, 2))
171  expect_equal(group_vars(x), "g")
172  expect_equal(group_size(x), integer(0))
173})
174
175test_that("[ on grouped_df preserves grouping if subset includes grouping vars", {
176  df <- tibble(x = 1:5, ` ` = 6:10)
177  by_x <- df %>% group_by(x)
178  expect_equal(by_x %>% groups(), by_x %>% `[`(1:2) %>% groups())
179
180  # non-syntactic name
181  by_ns <- df %>% group_by(` `)
182  expect_equal(by_ns %>% groups(), by_ns %>% `[`(1:2) %>% groups())
183})
184
185test_that("[ on grouped_df drops grouping if subset doesn't include grouping vars", {
186  by_cyl <- mtcars %>% group_by(cyl)
187  no_cyl <- by_cyl %>% `[`(c(1, 3))
188
189  expect_equal(group_vars(no_cyl), character())
190  expect_s3_class(no_cyl, "tbl_df")
191})
192
193test_that("group_by works after arrange (#959)", {
194  df <- tibble(Log = c(1, 2, 1, 2, 1, 2), Time = c(10, 1, 3, 0, 15, 11))
195  res <- df %>%
196    arrange(Time) %>%
197    group_by(Log) %>%
198    mutate(Diff = Time - lag(Time))
199  expect_true(all(is.na(res$Diff[c(1, 3)])))
200  expect_equal(res$Diff[c(2, 4, 5, 6)], c(1, 7, 10, 5))
201})
202
203test_that("group_by keeps attributes", {
204  d <- data.frame(x = structure(1:10, foo = "bar"))
205  gd <- group_by(d)
206  expect_equal(attr(gd$x, "foo"), "bar")
207})
208
209test_that("ungroup.rowwise_df gives a tbl_df (#936)", {
210  res <- mtcars %>% rowwise() %>% ungroup() %>% class()
211  expect_equal(res, c("tbl_df", "tbl", "data.frame"))
212})
213
214test_that(paste0("group_by handles encodings for native strings (#1507)"), {
215  local_non_utf8_encoding()
216
217  special <- get_native_lang_string()
218
219  df <- data.frame(x = 1:3, Eng = 2:4)
220
221  for (names_converter in c(enc2native, enc2utf8)) {
222    for (dots_converter in c(enc2native, enc2utf8)) {
223      names(df) <- names_converter(c(special, "Eng"))
224      res <- group_by(df, !!!syms(dots_converter(special)))
225      expect_equal(names(res), names(df))
226      expect_equal(group_vars(res), special)
227    }
228  }
229
230  for (names_converter in c(enc2native, enc2utf8)) {
231    names(df) <- names_converter(c(special, "Eng"))
232
233    res <- group_by(df, !!!special)
234    expect_equal(names(res), c(names(df), deparse(special)))
235    expect_equal(groups(res), list(as.name(enc2native(deparse(special)))))
236  }
237})
238
239test_that("group_by handles raw columns (#1803)", {
240  df <- tibble(a = 1:3, b = as.raw(1:3))
241  expect_identical(ungroup(group_by(df, a)), df)
242  expect_identical(ungroup(group_by(df, b)), df)
243})
244
245test_that("rowwise handles raw columns (#1803)", {
246  df <- tibble(a = 1:3, b = as.raw(1:3))
247  expect_s3_class(rowwise(df), "rowwise_df")
248})
249
250test_that("group_by() names pronouns correctly (#2686)", {
251  expect_named(group_by(tibble(x = 1), .data$x), "x")
252  expect_named(group_by(tibble(x = 1), .data[["x"]]), "x")
253})
254
255test_that("group_by() does not affect input data (#3028)", {
256  x <-
257    data.frame(old1 = c(1, 2, 3), old2 = c(4, 5, 6)) %>%
258    group_by(old1)
259
260  y <-
261    x %>%
262    select(new1 = old1, new2 = old2)
263
264  expect_identical(groups(x), syms(quote(old1)))
265})
266
267test_that("group_by() does not mutate for nothing when using the .data pronoun (#2752, #3533)", {
268  expect_identical(
269    iris %>% group_by(Species) %>% group_by(.data$Species),
270    iris %>% group_by(Species)
271  )
272  expect_identical(
273    iris %>% group_by(Species) %>% group_by(.data[["Species"]]),
274    iris %>% group_by(Species)
275  )
276
277  df <- tibble(x = 1:5)
278  attr(df, "y") <- 1
279
280  expect_equal( df %>% group_by(.data$x) %>% attr("y"), 1 )
281  expect_equal( df %>% group_by(.data[["x"]]) %>% attr("y"), 1 )
282})
283
284test_that("tbl_sum gets the right number of groups", {
285  res <- data.frame(x=c(1,1,2,2)) %>% group_by(x) %>% tbl_sum()
286  expect_equal(res, c("A tibble" = "4 x 1", "Groups" = "x [2]"))
287})
288
289test_that("group_by ignores empty quosures (3780)", {
290  empty <- quo()
291  expect_equal(group_by(mtcars, cyl), group_by(mtcars, cyl, !!empty))
292})
293
294# Zero groups ---------------------------------------------------
295
296test_that("mutate handles grouped tibble with 0 groups (#3935)", {
297  df <- tibble(x=integer()) %>% group_by(x)
298  res <- mutate(df, y = mean(x), z = +mean(x), n = n())
299  expect_equal(names(res), c("x", "y", "z", "n"))
300  expect_equal(nrow(res), 0L)
301  expect_equal(res$y, double())
302  expect_equal(res$z, double())
303  expect_equal(res$n, integer())
304})
305
306test_that("summarise handles grouped tibble with 0 groups (#3935)", {
307  df <- tibble(x=integer()) %>% group_by(x)
308  res <- summarise(df, y = mean(x), z = +mean(x), n = n())
309  expect_equal(names(res), c("x", "y", "z", "n"))
310  expect_equal(nrow(res), 0L)
311  expect_equal(res$y, double())
312  expect_equal(res$n, integer())
313  expect_equal(res$z, double())
314})
315
316test_that("filter handles grouped tibble with 0 groups (#3935)", {
317  df <- tibble(x=integer()) %>% group_by(x)
318  res <- filter(df, x > 3L)
319  expect_identical(df, res)
320})
321
322test_that("select handles grouped tibble with 0 groups (#3935)", {
323  df <- tibble(x=integer()) %>% group_by(x)
324  res <- select(df, x)
325  expect_identical(df, res)
326})
327
328test_that("arrange handles grouped tibble with 0 groups (#3935)", {
329  df <- tibble(x=integer()) %>% group_by(x)
330  res <- arrange(df, x)
331  expect_identical(df, res)
332})
333
334test_that("group_by() with empty spec produces a grouped data frame with 0 grouping variables", {
335  gdata <- group_data(group_by(iris))
336  expect_equal(names(gdata), ".rows")
337  expect_equal(gdata$.rows, list_of(1:nrow(iris)))
338
339  gdata <- group_data(group_by(iris, !!!list()))
340  expect_equal(names(gdata), ".rows")
341  expect_equal(gdata$.rows, list_of(1:nrow(iris)))
342})
343
344# .drop = TRUE ---------------------------------------------------
345
346test_that("group_by(.drop = TRUE) drops empty groups (4061)", {
347  res <- iris %>%
348    filter(Species == "setosa") %>%
349    group_by(Species, .drop = TRUE)
350
351  expect_identical(
352    group_data(res),
353    structure(
354      tibble(Species = factor("setosa", levels = levels(iris$Species)), .rows := list_of(1:50)),
355      .drop = TRUE
356    )
357  )
358
359  expect_true(group_by_drop_default(res))
360})
361
362test_that("grouped data frames remember their .drop (#4061)", {
363  res <- iris %>%
364    filter(Species == "setosa") %>%
365    group_by(Species, .drop = TRUE)
366
367  res2 <- res %>%
368    filter(Sepal.Length > 5)
369  expect_true(group_by_drop_default(res2))
370
371  res3 <- res %>%
372    filter(Sepal.Length > 5, .preserve = FALSE)
373  expect_true(group_by_drop_default(res3))
374
375  res4 <- res3 %>%
376    group_by(Species)
377  expect_true(group_by_drop_default(res4))
378  expect_equal(nrow(group_data(res4)), 1L)
379})
380
381test_that("grouped data frames remember their .drop = FALSE (#4337)", {
382  res <- iris %>%
383    filter(Species == "setosa") %>%
384    group_by(Species, .drop = FALSE)
385  expect_false(group_by_drop_default(res))
386
387  res2 <- res %>%
388    group_by(Species)
389  expect_false(group_by_drop_default(res2))
390})
391
392test_that("group_by(.drop = FALSE) preserve ordered factors (#5455)", {
393  df <- tibble(x = ordered("x"))
394  drop <- df %>% group_by(x) %>% group_data()
395  nodrop <- df %>% group_by(x, .drop = FALSE) %>% group_data()
396
397  expect_equal(is.ordered(drop$x), is.ordered(nodrop$x))
398  expect_true(is.ordered(nodrop$x))
399})
400
401test_that("summarise maintains the .drop attribute (#4061)", {
402  df <- tibble(
403    f1 = factor("a", levels = c("a", "b", "c")),
404    f2 = factor("d", levels = c("d", "e", "f", "g")),
405    x  = 42
406  )
407
408  res <- df %>%
409    group_by(f1, f2, .drop = TRUE)
410  expect_equal(n_groups(res), 1L)
411
412  res2 <- summarise(res, x = sum(x), .groups = "drop_last")
413  expect_equal(n_groups(res2), 1L)
414  expect_true(group_by_drop_default(res2))
415})
416
417test_that("joins maintain the .drop attribute (#4061)", {
418  df1 <- group_by(tibble(
419    f1 = factor(c("a", "b"), levels = c("a", "b", "c")),
420    x  = 42:43
421  ), f1, .drop = TRUE)
422
423  df2 <- group_by(tibble(
424    f1 = factor(c("a"), levels = c("a", "b", "c")),
425    y = 1
426  ), f1, .drop = TRUE)
427
428  res <- left_join(df1, df2, by = "f1")
429  expect_equal(n_groups(res), 2L)
430
431  df2 <- group_by(tibble(
432    f1 = factor(c("a", "c"), levels = c("a", "b", "c")),
433    y = 1:2
434  ), f1, .drop = TRUE)
435  res <- full_join(df1, df2, by = "f1")
436  expect_equal(n_groups(res), 3L)
437})
438
439test_that("group_by(add = TRUE) sets .drop if the origonal data was .drop", {
440  d <- tibble(
441    f1 = factor("b", levels = c("a", "b", "c")),
442    f2 = factor("g", levels = c("e", "f", "g")),
443    x  = 48
444  )
445
446  res <- group_by(group_by(d, f1, .drop = TRUE), f2, .add = TRUE)
447  expect_equal(n_groups(res), 1L)
448  expect_true(group_by_drop_default(res))
449})
450
451test_that("group_by_drop_default() is forgiving about corrupt grouped df (#4306)",{
452  df <- tibble(x = 1:2, y = 1:2) %>%
453    structure(class = c("grouped_df", "tbl_df", "tbl", "data.frame"))
454
455  expect_true(group_by_drop_default(df))
456})
457
458test_that("group_by() puts NA groups last in STRSXP (#4227)", {
459  res <- tibble(x = c("apple", NA, "banana"), y = 1:3) %>%
460    group_by(x) %>%
461    group_data()
462  expect_identical(res$x, c("apple", "banana", NA_character_))
463  expect_identical(res$.rows, list_of(1L, 3L, 2L))
464})
465
466test_that("group_by() does not create arbitrary NA groups for factors when drop = TRUE (#4460)", {
467  res <- expect_warning(group_data(group_by(iris, Species)[0, ]), NA)
468  expect_equal(nrow(res), 0L)
469
470  res <- expect_warning(group_data(group_by(iris[0, ], Species)), NA)
471  expect_equal(nrow(res), 0L)
472})
473
474test_that("group_by() can handle auto splicing in the mutate() step", {
475  expect_identical(
476    iris %>% group_by(Species),
477    iris %>% group_by(data.frame(Species = Species))
478  )
479
480  expect_identical(
481    iris %>% group_by(Species),
482    iris %>% group_by(across(Species))
483  )
484
485  expect_identical(
486    iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width),
487    iris %>% group_by(across(starts_with("Sepal"), round))
488  )
489
490  df <- tibble(x = c(1, 2), y = c(1, 2))
491  expect_identical(df %>% group_by(across(character())), df)
492  expect_identical(df %>% group_by(across(NULL)), df)
493
494  expect_identical(df %>% group_by(x) %>% group_by(across(character())), df)
495  expect_identical(df %>% group_by(x) %>% group_by(across(NULL)), df)
496})
497
498test_that("group_by() can combine usual spec and auto-splicing-mutate() step", {
499  expect_identical(
500    iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width, Species),
501    iris %>% group_by(across(starts_with("Sepal"), round), Species)
502  )
503
504  expect_identical(
505    iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Species, Sepal.Length, Sepal.Width),
506    iris %>% group_by(Species, across(starts_with("Sepal"), round))
507  )
508})
509
510# mutate() semantics
511
512test_that("group_by() has mutate() semantics (#4984)", {
513  expect_equal(
514    tibble(a = 1, b = 2) %>% group_by(c = a * b, d = c + 1),
515    tibble(a = 1, b = 2) %>% mutate(c = a * b, d = c + 1) %>% group_by(c, d)
516  )
517})
518
519test_that("implicit mutate() operates on ungrouped data (#5598)", {
520  vars <- tibble(x = c(1,2), y = c(3,4), z = c(5,6)) %>%
521    dplyr::group_by(y) %>%
522    dplyr::group_by(across(any_of(c('y','z')))) %>%
523    dplyr::group_vars()
524  expect_equal(vars, c("y", "z"))
525})
526
527test_that("grouped_df() does not break row.names (#5745)", {
528  groups <- compute_groups(data.frame(x = 1:10), "x")
529  expect_equal(.row_names_info(groups, type = 0), c(NA, -10L))
530})
531
532test_that("group_by() keeps attributes unrelated to the grouping (#5760)", {
533  d <- data.frame(x = 453, y = 642)
534  attr(d, "foo") <- "bar"
535
536  d2 <- group_by(d, x)
537  expect_equal(attr(d2, "foo"), "bar")
538
539  d3 <- group_by(d2, y, .add = TRUE)
540  expect_equal(attr(d2, "foo"), "bar")
541
542  d4 <- group_by(d2, y2 = y * 2, .add = TRUE)
543  expect_equal(attr(d2, "foo"), "bar")
544})
545
546test_that("group_by() works with quosures (tidyverse/lubridate#959)", {
547  ignore <- function(...) NA
548  f <- function(var) {
549    tibble(x = 1) %>% group_by(g = ignore({{ var }}))
550  }
551  g <- function(var) {
552    # This used to fail with the extra argument
553    tibble(x = 1) %>% group_by(g = ignore({{ var }}, 1))
554  }
555  expect_equal(f(), tibble(x = 1, g = NA) %>% group_by(g))
556  expect_equal(g(), tibble(x = 1, g = NA) %>% group_by(g))
557})
558
559test_that("group_by() propagates caller env", {
560  expect_caller_env(group_by(mtcars, sig_caller_env()))
561})
562
563
564# Errors ------------------------------------------------------------------
565
566test_that("group_by() and ungroup() give meaningful error messages", {
567  df <- tibble(x = 1, y = 2)
568
569  expect_snapshot(error = TRUE, df %>% group_by(unknown))
570  expect_snapshot(error = TRUE, df %>% ungroup(x))
571  expect_snapshot(error = TRUE, df %>% group_by(x, y) %>% ungroup(z))
572
573  expect_snapshot(error = TRUE, df %>% group_by(z = a + 1))
574})
575