1df <- data.frame(x = rep(1:3, each = 10), y = rep(1:6, each = 5)) 2 3test_that("group_by with .add = TRUE adds groups", { 4 add_groups1 <- function(tbl) group_by(tbl, x, y, .add = TRUE) 5 add_groups2 <- function(tbl) group_by(group_by(tbl, x, .add = TRUE), y, .add = TRUE) 6 7 expect_equal(group_vars(add_groups1(df)), c("x", "y")) 8 expect_equal(group_vars(add_groups2(df)), c("x", "y")) 9}) 10 11test_that("add = TRUE is deprecated", { 12 rlang::scoped_options(lifecycle_verbosity = "warning") 13 14 df <- tibble(x = 1, y = 2) 15 16 expect_warning( 17 out <- df %>% group_by(x) %>% group_by(y, add = TRUE), 18 "deprecated" 19 ) 20 expect_equal(group_vars(out), c("x", "y")) 21}) 22 23test_that("joins preserve grouping", { 24 g <- group_by(df, x) 25 26 expect_equal(group_vars(inner_join(g, g, by = c("x", "y"))), "x") 27 expect_equal(group_vars(left_join(g, g, by = c("x", "y"))), "x") 28 expect_equal(group_vars(semi_join(g, g, by = c("x", "y"))), "x") 29 expect_equal(group_vars(anti_join(g, g, by = c("x", "y"))), "x") 30}) 31 32test_that("constructors drops groups", { 33 df <- data.frame(x = 1:3) %>% group_by(x) 34 expect_equal(group_vars(as_tibble(df)), character()) 35}) 36 37test_that("grouping by constant adds column (#410)", { 38 grouped <- group_by(mtcars, "cyl") %>% summarise(foo = n()) 39 expect_equal(names(grouped), c('"cyl"', "foo")) 40 expect_equal(nrow(grouped), 1L) 41}) 42 43test_that(".dots is soft deprecated", { 44 rlang::scoped_options(lifecycle_verbosity = "warning") 45 46 df <- tibble(x = 1, y = 1) 47 expect_warning(gf <- group_by(df, .dots = "x"), "deprecated") 48}) 49 50# Test full range of variable types -------------------------------------------- 51 52 53test_that("local group_by preserves variable types", { 54 df_var <- tibble( 55 l = c(T, F), 56 i = 1:2, 57 d = Sys.Date() + 1:2, 58 f = factor(letters[1:2]), 59 num = 1:2 + 0.5, 60 t = Sys.time() + 1:2, 61 c = letters[1:2] 62 ) 63 attr(df_var$t, "tzone") <- "" 64 65 for (var in names(df_var)) { 66 expected <- tibble(!!var := sort(unique(df_var[[var]])), n = 1L) 67 68 summarised <- df_var %>% group_by(!!sym(var)) %>% summarise(n = n()) 69 expect_equal(summarised, expected) 70 } 71}) 72 73test_that("mutate does not loose variables (#144)", { 74 df <- tibble(a = rep(1:4, 2), b = rep(1:4, each = 2), x = runif(8)) 75 by_ab <- group_by(df, a, b) 76 by_a <- summarise(by_ab, x = sum(x), .groups = "drop_last") 77 by_a_quartile <- group_by(by_a, quartile = ntile(x, 4)) 78 79 expect_equal(names(by_a_quartile), c("a", "b", "x", "quartile")) 80}) 81 82test_that("group_by uses shallow copy", { 83 m1 <- group_by(mtcars, cyl) 84 expect_equal(group_vars(mtcars), character()) 85 86 expect_equal( 87 lobstr::obj_addrs(mtcars), 88 lobstr::obj_addrs(m1) 89 ) 90}) 91 92test_that("group_by orders by groups. #242", { 93 df <- data.frame(a = sample(1:10, 3000, replace = TRUE)) %>% group_by(a) 94 expect_equal(group_data(df)$a, 1:10) 95 96 df <- data.frame(a = sample(letters[1:10], 3000, replace = TRUE), stringsAsFactors = FALSE) %>% group_by(a) 97 expect_equal(group_data(df)$a, letters[1:10]) 98 99 df <- data.frame(a = sample(sqrt(1:10), 3000, replace = TRUE)) %>% group_by(a) 100 expect_equal(group_data(df)$a, sqrt(1:10)) 101}) 102 103test_that("Can group_by() a POSIXlt", { 104 skip_if_not_installed("tibble", "2.99.99") 105 df <- tibble(x = 1:5, times = as.POSIXlt(seq.Date(Sys.Date(), length.out = 5, by = "day"))) 106 g <- group_by(df, times) 107 expect_equal(nrow(group_data(g)), 5L) 108}) 109 110test_that("group_by() handles list as grouping variables", { 111 df <- tibble(x = 1:3, y = list(1:2, 1:3, 1:2)) 112 gdata <- group_data(group_by(df, y)) 113 expect_equal(nrow(gdata), 2L) 114 expect_equal(gdata$y, list(1:2, 1:3)) 115 expect_equal(gdata$.rows, list_of(c(1L, 3L), 2L)) 116}) 117 118test_that("select(group_by(.)) implicitely adds grouping variables (#170)", { 119 expect_snapshot( 120 res <- mtcars %>% group_by(vs) %>% select(mpg) 121 ) 122 expect_equal(names(res), c("vs", "mpg")) 123}) 124 125test_that("group_by only creates one group for NA (#401)", { 126 x <- as.numeric(c(NA, NA, NA, 10:1, 10:1)) 127 w <- c(20, 30, 40, 1:10, 1:10) * 10 128 129 n_distinct(x) # 11 OK 130 res <- data.frame(x = x, w = w) %>% group_by(x) %>% summarise(n = n()) 131 expect_equal(nrow(res), 11L) 132}) 133 134test_that("there can be 0 groups (#486)", { 135 data <- tibble(a = numeric(0), g = character(0)) %>% group_by(g) 136 expect_equal(length(data$a), 0L) 137 expect_equal(length(data$g), 0L) 138 expect_equal(map_int(group_rows(data), length), integer(0)) 139}) 140 141test_that("group_by works with zero-row data frames (#486)", { 142 df <- data.frame(a = numeric(0), b = numeric(0), g = character(0)) 143 dfg <- group_by(df, g, .drop = FALSE) 144 expect_equal(dim(dfg), c(0, 3)) 145 expect_equal(group_vars(dfg), "g") 146 expect_equal(group_size(dfg), integer(0)) 147 148 x <- summarise(dfg, n = n()) 149 expect_equal(dim(x), c(0, 2)) 150 expect_equal(group_vars(x), character()) 151 152 x <- mutate(dfg, c = b + 1) 153 expect_equal(dim(x), c(0, 4)) 154 expect_equal(group_vars(x), "g") 155 expect_equal(group_size(x), integer(0)) 156 157 x <- filter(dfg, a == 100) 158 expect_equal(dim(x), c(0, 3)) 159 expect_equal(group_vars(x), "g") 160 expect_equal(group_size(x), integer(0)) 161 162 x <- arrange(dfg, a, g) 163 expect_equal(dim(x), c(0, 3)) 164 expect_equal(group_vars(x), "g") 165 expect_equal(group_size(x), integer(0)) 166 167 expect_snapshot( 168 x <- select(dfg, a) # Only select 'a' column; should result in 'g' and 'a' 169 ) 170 expect_equal(dim(x), c(0, 2)) 171 expect_equal(group_vars(x), "g") 172 expect_equal(group_size(x), integer(0)) 173}) 174 175test_that("[ on grouped_df preserves grouping if subset includes grouping vars", { 176 df <- tibble(x = 1:5, ` ` = 6:10) 177 by_x <- df %>% group_by(x) 178 expect_equal(by_x %>% groups(), by_x %>% `[`(1:2) %>% groups()) 179 180 # non-syntactic name 181 by_ns <- df %>% group_by(` `) 182 expect_equal(by_ns %>% groups(), by_ns %>% `[`(1:2) %>% groups()) 183}) 184 185test_that("[ on grouped_df drops grouping if subset doesn't include grouping vars", { 186 by_cyl <- mtcars %>% group_by(cyl) 187 no_cyl <- by_cyl %>% `[`(c(1, 3)) 188 189 expect_equal(group_vars(no_cyl), character()) 190 expect_s3_class(no_cyl, "tbl_df") 191}) 192 193test_that("group_by works after arrange (#959)", { 194 df <- tibble(Log = c(1, 2, 1, 2, 1, 2), Time = c(10, 1, 3, 0, 15, 11)) 195 res <- df %>% 196 arrange(Time) %>% 197 group_by(Log) %>% 198 mutate(Diff = Time - lag(Time)) 199 expect_true(all(is.na(res$Diff[c(1, 3)]))) 200 expect_equal(res$Diff[c(2, 4, 5, 6)], c(1, 7, 10, 5)) 201}) 202 203test_that("group_by keeps attributes", { 204 d <- data.frame(x = structure(1:10, foo = "bar")) 205 gd <- group_by(d) 206 expect_equal(attr(gd$x, "foo"), "bar") 207}) 208 209test_that("ungroup.rowwise_df gives a tbl_df (#936)", { 210 res <- mtcars %>% rowwise() %>% ungroup() %>% class() 211 expect_equal(res, c("tbl_df", "tbl", "data.frame")) 212}) 213 214test_that(paste0("group_by handles encodings for native strings (#1507)"), { 215 local_non_utf8_encoding() 216 217 special <- get_native_lang_string() 218 219 df <- data.frame(x = 1:3, Eng = 2:4) 220 221 for (names_converter in c(enc2native, enc2utf8)) { 222 for (dots_converter in c(enc2native, enc2utf8)) { 223 names(df) <- names_converter(c(special, "Eng")) 224 res <- group_by(df, !!!syms(dots_converter(special))) 225 expect_equal(names(res), names(df)) 226 expect_equal(group_vars(res), special) 227 } 228 } 229 230 for (names_converter in c(enc2native, enc2utf8)) { 231 names(df) <- names_converter(c(special, "Eng")) 232 233 res <- group_by(df, !!!special) 234 expect_equal(names(res), c(names(df), deparse(special))) 235 expect_equal(groups(res), list(as.name(enc2native(deparse(special))))) 236 } 237}) 238 239test_that("group_by handles raw columns (#1803)", { 240 df <- tibble(a = 1:3, b = as.raw(1:3)) 241 expect_identical(ungroup(group_by(df, a)), df) 242 expect_identical(ungroup(group_by(df, b)), df) 243}) 244 245test_that("rowwise handles raw columns (#1803)", { 246 df <- tibble(a = 1:3, b = as.raw(1:3)) 247 expect_s3_class(rowwise(df), "rowwise_df") 248}) 249 250test_that("group_by() names pronouns correctly (#2686)", { 251 expect_named(group_by(tibble(x = 1), .data$x), "x") 252 expect_named(group_by(tibble(x = 1), .data[["x"]]), "x") 253}) 254 255test_that("group_by() does not affect input data (#3028)", { 256 x <- 257 data.frame(old1 = c(1, 2, 3), old2 = c(4, 5, 6)) %>% 258 group_by(old1) 259 260 y <- 261 x %>% 262 select(new1 = old1, new2 = old2) 263 264 expect_identical(groups(x), syms(quote(old1))) 265}) 266 267test_that("group_by() does not mutate for nothing when using the .data pronoun (#2752, #3533)", { 268 expect_identical( 269 iris %>% group_by(Species) %>% group_by(.data$Species), 270 iris %>% group_by(Species) 271 ) 272 expect_identical( 273 iris %>% group_by(Species) %>% group_by(.data[["Species"]]), 274 iris %>% group_by(Species) 275 ) 276 277 df <- tibble(x = 1:5) 278 attr(df, "y") <- 1 279 280 expect_equal( df %>% group_by(.data$x) %>% attr("y"), 1 ) 281 expect_equal( df %>% group_by(.data[["x"]]) %>% attr("y"), 1 ) 282}) 283 284test_that("tbl_sum gets the right number of groups", { 285 res <- data.frame(x=c(1,1,2,2)) %>% group_by(x) %>% tbl_sum() 286 expect_equal(res, c("A tibble" = "4 x 1", "Groups" = "x [2]")) 287}) 288 289test_that("group_by ignores empty quosures (3780)", { 290 empty <- quo() 291 expect_equal(group_by(mtcars, cyl), group_by(mtcars, cyl, !!empty)) 292}) 293 294# Zero groups --------------------------------------------------- 295 296test_that("mutate handles grouped tibble with 0 groups (#3935)", { 297 df <- tibble(x=integer()) %>% group_by(x) 298 res <- mutate(df, y = mean(x), z = +mean(x), n = n()) 299 expect_equal(names(res), c("x", "y", "z", "n")) 300 expect_equal(nrow(res), 0L) 301 expect_equal(res$y, double()) 302 expect_equal(res$z, double()) 303 expect_equal(res$n, integer()) 304}) 305 306test_that("summarise handles grouped tibble with 0 groups (#3935)", { 307 df <- tibble(x=integer()) %>% group_by(x) 308 res <- summarise(df, y = mean(x), z = +mean(x), n = n()) 309 expect_equal(names(res), c("x", "y", "z", "n")) 310 expect_equal(nrow(res), 0L) 311 expect_equal(res$y, double()) 312 expect_equal(res$n, integer()) 313 expect_equal(res$z, double()) 314}) 315 316test_that("filter handles grouped tibble with 0 groups (#3935)", { 317 df <- tibble(x=integer()) %>% group_by(x) 318 res <- filter(df, x > 3L) 319 expect_identical(df, res) 320}) 321 322test_that("select handles grouped tibble with 0 groups (#3935)", { 323 df <- tibble(x=integer()) %>% group_by(x) 324 res <- select(df, x) 325 expect_identical(df, res) 326}) 327 328test_that("arrange handles grouped tibble with 0 groups (#3935)", { 329 df <- tibble(x=integer()) %>% group_by(x) 330 res <- arrange(df, x) 331 expect_identical(df, res) 332}) 333 334test_that("group_by() with empty spec produces a grouped data frame with 0 grouping variables", { 335 gdata <- group_data(group_by(iris)) 336 expect_equal(names(gdata), ".rows") 337 expect_equal(gdata$.rows, list_of(1:nrow(iris))) 338 339 gdata <- group_data(group_by(iris, !!!list())) 340 expect_equal(names(gdata), ".rows") 341 expect_equal(gdata$.rows, list_of(1:nrow(iris))) 342}) 343 344# .drop = TRUE --------------------------------------------------- 345 346test_that("group_by(.drop = TRUE) drops empty groups (4061)", { 347 res <- iris %>% 348 filter(Species == "setosa") %>% 349 group_by(Species, .drop = TRUE) 350 351 expect_identical( 352 group_data(res), 353 structure( 354 tibble(Species = factor("setosa", levels = levels(iris$Species)), .rows := list_of(1:50)), 355 .drop = TRUE 356 ) 357 ) 358 359 expect_true(group_by_drop_default(res)) 360}) 361 362test_that("grouped data frames remember their .drop (#4061)", { 363 res <- iris %>% 364 filter(Species == "setosa") %>% 365 group_by(Species, .drop = TRUE) 366 367 res2 <- res %>% 368 filter(Sepal.Length > 5) 369 expect_true(group_by_drop_default(res2)) 370 371 res3 <- res %>% 372 filter(Sepal.Length > 5, .preserve = FALSE) 373 expect_true(group_by_drop_default(res3)) 374 375 res4 <- res3 %>% 376 group_by(Species) 377 expect_true(group_by_drop_default(res4)) 378 expect_equal(nrow(group_data(res4)), 1L) 379}) 380 381test_that("grouped data frames remember their .drop = FALSE (#4337)", { 382 res <- iris %>% 383 filter(Species == "setosa") %>% 384 group_by(Species, .drop = FALSE) 385 expect_false(group_by_drop_default(res)) 386 387 res2 <- res %>% 388 group_by(Species) 389 expect_false(group_by_drop_default(res2)) 390}) 391 392test_that("group_by(.drop = FALSE) preserve ordered factors (#5455)", { 393 df <- tibble(x = ordered("x")) 394 drop <- df %>% group_by(x) %>% group_data() 395 nodrop <- df %>% group_by(x, .drop = FALSE) %>% group_data() 396 397 expect_equal(is.ordered(drop$x), is.ordered(nodrop$x)) 398 expect_true(is.ordered(nodrop$x)) 399}) 400 401test_that("summarise maintains the .drop attribute (#4061)", { 402 df <- tibble( 403 f1 = factor("a", levels = c("a", "b", "c")), 404 f2 = factor("d", levels = c("d", "e", "f", "g")), 405 x = 42 406 ) 407 408 res <- df %>% 409 group_by(f1, f2, .drop = TRUE) 410 expect_equal(n_groups(res), 1L) 411 412 res2 <- summarise(res, x = sum(x), .groups = "drop_last") 413 expect_equal(n_groups(res2), 1L) 414 expect_true(group_by_drop_default(res2)) 415}) 416 417test_that("joins maintain the .drop attribute (#4061)", { 418 df1 <- group_by(tibble( 419 f1 = factor(c("a", "b"), levels = c("a", "b", "c")), 420 x = 42:43 421 ), f1, .drop = TRUE) 422 423 df2 <- group_by(tibble( 424 f1 = factor(c("a"), levels = c("a", "b", "c")), 425 y = 1 426 ), f1, .drop = TRUE) 427 428 res <- left_join(df1, df2, by = "f1") 429 expect_equal(n_groups(res), 2L) 430 431 df2 <- group_by(tibble( 432 f1 = factor(c("a", "c"), levels = c("a", "b", "c")), 433 y = 1:2 434 ), f1, .drop = TRUE) 435 res <- full_join(df1, df2, by = "f1") 436 expect_equal(n_groups(res), 3L) 437}) 438 439test_that("group_by(add = TRUE) sets .drop if the origonal data was .drop", { 440 d <- tibble( 441 f1 = factor("b", levels = c("a", "b", "c")), 442 f2 = factor("g", levels = c("e", "f", "g")), 443 x = 48 444 ) 445 446 res <- group_by(group_by(d, f1, .drop = TRUE), f2, .add = TRUE) 447 expect_equal(n_groups(res), 1L) 448 expect_true(group_by_drop_default(res)) 449}) 450 451test_that("group_by_drop_default() is forgiving about corrupt grouped df (#4306)",{ 452 df <- tibble(x = 1:2, y = 1:2) %>% 453 structure(class = c("grouped_df", "tbl_df", "tbl", "data.frame")) 454 455 expect_true(group_by_drop_default(df)) 456}) 457 458test_that("group_by() puts NA groups last in STRSXP (#4227)", { 459 res <- tibble(x = c("apple", NA, "banana"), y = 1:3) %>% 460 group_by(x) %>% 461 group_data() 462 expect_identical(res$x, c("apple", "banana", NA_character_)) 463 expect_identical(res$.rows, list_of(1L, 3L, 2L)) 464}) 465 466test_that("group_by() does not create arbitrary NA groups for factors when drop = TRUE (#4460)", { 467 res <- expect_warning(group_data(group_by(iris, Species)[0, ]), NA) 468 expect_equal(nrow(res), 0L) 469 470 res <- expect_warning(group_data(group_by(iris[0, ], Species)), NA) 471 expect_equal(nrow(res), 0L) 472}) 473 474test_that("group_by() can handle auto splicing in the mutate() step", { 475 expect_identical( 476 iris %>% group_by(Species), 477 iris %>% group_by(data.frame(Species = Species)) 478 ) 479 480 expect_identical( 481 iris %>% group_by(Species), 482 iris %>% group_by(across(Species)) 483 ) 484 485 expect_identical( 486 iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width), 487 iris %>% group_by(across(starts_with("Sepal"), round)) 488 ) 489 490 df <- tibble(x = c(1, 2), y = c(1, 2)) 491 expect_identical(df %>% group_by(across(character())), df) 492 expect_identical(df %>% group_by(across(NULL)), df) 493 494 expect_identical(df %>% group_by(x) %>% group_by(across(character())), df) 495 expect_identical(df %>% group_by(x) %>% group_by(across(NULL)), df) 496}) 497 498test_that("group_by() can combine usual spec and auto-splicing-mutate() step", { 499 expect_identical( 500 iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width, Species), 501 iris %>% group_by(across(starts_with("Sepal"), round), Species) 502 ) 503 504 expect_identical( 505 iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Species, Sepal.Length, Sepal.Width), 506 iris %>% group_by(Species, across(starts_with("Sepal"), round)) 507 ) 508}) 509 510# mutate() semantics 511 512test_that("group_by() has mutate() semantics (#4984)", { 513 expect_equal( 514 tibble(a = 1, b = 2) %>% group_by(c = a * b, d = c + 1), 515 tibble(a = 1, b = 2) %>% mutate(c = a * b, d = c + 1) %>% group_by(c, d) 516 ) 517}) 518 519test_that("implicit mutate() operates on ungrouped data (#5598)", { 520 vars <- tibble(x = c(1,2), y = c(3,4), z = c(5,6)) %>% 521 dplyr::group_by(y) %>% 522 dplyr::group_by(across(any_of(c('y','z')))) %>% 523 dplyr::group_vars() 524 expect_equal(vars, c("y", "z")) 525}) 526 527test_that("grouped_df() does not break row.names (#5745)", { 528 groups <- compute_groups(data.frame(x = 1:10), "x") 529 expect_equal(.row_names_info(groups, type = 0), c(NA, -10L)) 530}) 531 532test_that("group_by() keeps attributes unrelated to the grouping (#5760)", { 533 d <- data.frame(x = 453, y = 642) 534 attr(d, "foo") <- "bar" 535 536 d2 <- group_by(d, x) 537 expect_equal(attr(d2, "foo"), "bar") 538 539 d3 <- group_by(d2, y, .add = TRUE) 540 expect_equal(attr(d2, "foo"), "bar") 541 542 d4 <- group_by(d2, y2 = y * 2, .add = TRUE) 543 expect_equal(attr(d2, "foo"), "bar") 544}) 545 546test_that("group_by() works with quosures (tidyverse/lubridate#959)", { 547 ignore <- function(...) NA 548 f <- function(var) { 549 tibble(x = 1) %>% group_by(g = ignore({{ var }})) 550 } 551 g <- function(var) { 552 # This used to fail with the extra argument 553 tibble(x = 1) %>% group_by(g = ignore({{ var }}, 1)) 554 } 555 expect_equal(f(), tibble(x = 1, g = NA) %>% group_by(g)) 556 expect_equal(g(), tibble(x = 1, g = NA) %>% group_by(g)) 557}) 558 559test_that("group_by() propagates caller env", { 560 expect_caller_env(group_by(mtcars, sig_caller_env())) 561}) 562 563 564# Errors ------------------------------------------------------------------ 565 566test_that("group_by() and ungroup() give meaningful error messages", { 567 df <- tibble(x = 1, y = 2) 568 569 expect_snapshot(error = TRUE, df %>% group_by(unknown)) 570 expect_snapshot(error = TRUE, df %>% ungroup(x)) 571 expect_snapshot(error = TRUE, df %>% group_by(x, y) %>% ungroup(z)) 572 573 expect_snapshot(error = TRUE, df %>% group_by(z = a + 1)) 574}) 575