1 2# counting ---------------------------------------------------------------- 3 4test_that("vec_count counts number observations", { 5 x <- vec_count(rep(1:3, 1:3), sort = "key") 6 expect_equal(x, data.frame(key = 1:3, count = 1:3)) 7}) 8 9test_that("vec_count works with matrices", { 10 x <- matrix(c(1, 1, 1, 2, 2, 1), c(3, 2)) 11 12 out <- vec_count(x) 13 exp <- data_frame(key = c(NA, NA), count = int(2L, 1L)) 14 exp$key <- vec_slice(x, c(1, 3)) 15 16 expect_identical(out, exp) 17}) 18 19test_that("vec_count works with arrays", { 20 x <- array(c(rep(1, 3), rep(2, 3)), dim = c(3, 2, 1)) 21 expect <- data.frame(key = NA, count = 3) 22 expect$key <- vec_slice(x, 1L) 23 expect_equal(vec_count(x), expect) 24}) 25 26test_that("vec_count works for zero-length input", { 27 x <- vec_count(integer(), sort = "none") 28 expect_equal(x, data.frame(key = integer(), count = integer())) 29}) 30 31test_that("vec_count works with different encodings", { 32 x <- vec_count(encodings()) 33 expect_equal(x, new_data_frame(list(key = encodings()[1], count = 3L))) 34}) 35 36test_that("vec_count recursively takes the equality proxy", { 37 local_comparable_tuple() 38 39 x <- tuple(c(1, 1, 2), 1:3) 40 df <- data_frame(x = x) 41 expect <- data_frame(key = vec_slice(df, c(1, 3)), count = c(2L, 1L)) 42 43 expect_equal(vec_count(df), expect) 44}) 45 46# duplicates and uniques -------------------------------------------------- 47 48test_that("vec_duplicated reports on duplicates regardless of position", { 49 x <- c(1, 1, 2, 3, 4, 4) 50 expect_equal(vec_duplicate_detect(x), c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE)) 51}) 52 53test_that("vec_duplicate_any returns single TRUE/FALSE", { 54 expect_false(vec_duplicate_any(c(1:10))) 55 expect_true(vec_duplicate_any(c(1:10, 1))) 56}) 57 58test_that("vec_duplicate_id gives position of first found", { 59 x <- c(1, 2, 3, 1, 4) 60 expect_equal(vec_duplicate_id(x), c(1, 2, 3, 1, 5)) 61}) 62 63test_that("vec_unique matches unique", { 64 x <- sample(100, 1000, replace = TRUE) 65 expect_equal(vec_unique(x), unique(x)) 66 expect_equal(vec_unique(c("x", "x")), "x") 67}) 68 69test_that("vec_unique matches unique for matrices", { 70 x <- matrix(c(1, 1, 2, 2), ncol = 2) 71 expect_equal(vec_unique(x), unique(x)) 72}) 73 74test_that("vec_unique_count matches length + unique", { 75 x <- sample(100, 1000, replace = TRUE) 76 expect_equal(vec_unique_count(x), length(unique(x))) 77}) 78 79test_that("also works for data frames", { 80 df <- data.frame(x = 1:3, y = letters[3:1], stringsAsFactors = FALSE) 81 idx <- c(1L, 1L, 1L, 2L, 2L, 3L) 82 df2 <- df[idx, , drop = FALSE] 83 rownames(df2) <- NULL 84 85 expect_equal(vec_duplicate_detect(df2), vec_duplicate_detect(idx)) 86 expect_equal(vec_unique(df2), vec_slice(df, vec_unique(idx))) 87 88 count <- vec_count(df2, sort = "key") 89 expect_equal(count$key, df) 90 expect_equal(count$count, vec_count(idx)$count) 91 92 exp <- tibble(x = c(1, 1, 2), y = c(1, 2, 3)) 93 expect_identical(vec_unique(vec_slice(exp, c(1, 1, 2, 3))), exp) 94}) 95 96test_that("vec_unique() handles matrices (#327)", { 97 x <- matrix(c(1, 2, 3, 4), c(2, 2)) 98 y <- matrix(c(1, 2, 3, 5), c(2, 2)) 99 expect_identical(vec_unique(list(x, x)), list(x)) 100 expect_identical(vec_unique(list(x, y)), list(x, y)) 101 102 x <- matrix(c(1, 2, 1, 1, 2, 1), nrow = 3) 103 expect_identical(vec_unique(x), vec_slice(x, 1:2)) 104}) 105 106test_that("vec_unique() works with 1D arrays", { 107 # 1D arrays are dispatched to `as.data.frame.vector()` which 108 # currently does not strip dimensions. This caused an infinite 109 # recursion. 110 expect_identical(vec_unique(array(1:2)), array(1:2)) 111 112 x <- new_vctr(c(1, 1, 1, 2, 1, 2), dim = c(3, 2)) 113 expect_identical(vec_unique(x), new_vctr(c(1, 1, 2, 1), dim = c(2, 2))) 114}) 115 116test_that("unique functions take the equality proxy (#375)", { 117 local_comparable_tuple() 118 x <- tuple(c(1, 2, 1), 1:3) 119 120 expect_true(vec_in(tuple(2, 100), x)) 121 expect_identical(vec_match(tuple(2, 100), x), 2L) 122}) 123 124test_that("unique functions take the equality proxy recursively", { 125 local_comparable_tuple() 126 127 x <- tuple(c(1, 1, 2), 1:3) 128 df <- data_frame(x = x) 129 130 expect_equal(vec_unique(df), vec_slice(df, c(1, 3))) 131 expect_equal(vec_unique_count(df), 2L) 132 expect_equal(vec_unique_loc(df), c(1, 3)) 133}) 134 135test_that("duplicate functions take the equality proxy recursively", { 136 local_comparable_tuple() 137 138 x <- tuple(c(1, 1, 2), 1:3) 139 df <- data_frame(x = x) 140 141 expect_equal(vec_duplicate_any(df), TRUE) 142 expect_equal(vec_duplicate_detect(df), c(TRUE, TRUE, FALSE)) 143 expect_equal(vec_duplicate_id(df), c(1, 1, 3)) 144}) 145 146test_that("unique functions treat positive and negative 0 as equivalent (#637)", { 147 expect_equal(vec_unique(c(0, -0)), 0) 148 expect_equal(vec_unique_count(c(0, -0)), 1) 149 expect_equal(vec_unique_loc(c(0, -0)), 1) 150}) 151 152test_that("unique functions work with different encodings", { 153 encs <- encodings() 154 155 expect_equal(vec_unique(encs), encs[1]) 156 expect_equal(vec_unique_count(encs), 1L) 157 expect_equal(vec_unique_loc(encs), 1L) 158}) 159 160test_that("unique functions can handle scalar types in lists", { 161 x <- list(x = a ~ b, y = a ~ b, z = a ~ c) 162 expect_equal(vec_unique(x), vec_slice(x, c(1, 3))) 163 164 x <- list(x = call("x"), y = call("y"), z = call("x")) 165 expect_equal(vec_unique(x), vec_slice(x, c(1, 2))) 166}) 167 168test_that("duplicate functions works with different encodings", { 169 encs <- encodings() 170 171 expect_equal(vec_duplicate_id(encs), rep(1, 3)) 172 expect_equal(vec_duplicate_detect(encs), rep(TRUE, 3)) 173 expect_equal(vec_duplicate_any(encs), TRUE) 174}) 175 176test_that("vec_unique() returns differently encoded strings in the order they appear", { 177 encs <- encodings() 178 x <- c(encs$unknown, encs$utf8) 179 y <- c(encs$utf8, encs$unknown) 180 181 expect_equal_encoding(vec_unique(x), encs$unknown) 182 expect_equal_encoding(vec_unique(y), encs$utf8) 183}) 184 185test_that("vec_unique() works on lists containing expressions", { 186 x <- list(expression(x), expression(y), expression(x)) 187 expect_equal(vec_unique(x), x[1:2]) 188}) 189 190test_that("vec_unique() works with glm objects (#643)", { 191 # class(model$family$initialize) == "expression" 192 model <- glm(mpg ~ wt, data = mtcars) 193 expect_equal(vec_unique(list(model, model)), list(model)) 194}) 195 196test_that("can take the unique locations of dfs with list-cols", { 197 df <- tibble(x = list(1, 2, 1, 3), y = list(1, 2, 1, 3)) 198 expect_identical(vec_unique_loc(df), c(1L, 2L, 4L)) 199}) 200 201 202# matching ---------------------------------------------------------------- 203 204test_that("vec_match() matches match()", { 205 n <- c(1:3, NA) 206 h <- c(4, 2, 1, NA) 207 expect_equal(vec_match(n, h), match(n, h)) 208 209 expect_equal(vec_match(1.5, c(2, 1.5, NA)), match(1.5, c(2, 1.5, NA))) 210 expect_equal(vec_match("x", "x"), match("x", "x")) 211}) 212 213test_that("vec_match() and vec_in() check types", { 214 verify_errors({ 215 df1 <- data_frame(x = data_frame(foo = 1)) 216 df2 <- data_frame(x = data_frame(foo = "")) 217 expect_error(vec_match(df1, df2), class = "vctrs_error_incompatible_type") 218 expect_error(vec_match(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type") 219 expect_error(vec_in(df1, df2), class = "vctrs_error_incompatible_type") 220 expect_error(vec_in(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type") 221 }) 222}) 223 224test_that("vec_in() matches %in%", { 225 n <- c(1:3, NA) 226 h <- c(4, 2, 1, NA) 227 228 expect_equal(vec_in(n, h), n %in% h) 229}) 230 231test_that("can opt out of NA matching", { 232 n <- c(1, NA) 233 h <- c(1:3, NA) 234 235 expect_equal(vec_in(n, h, na_equal = FALSE), c(TRUE, NA)) 236}) 237 238test_that("vec_match works with empty data frame", { 239 out <- vec_match( 240 new_data_frame(n = 3L), 241 new_data_frame(n = 0L) 242 ) 243 expect_equal(out, vec_init(integer(), 3)) 244}) 245 246test_that("matching functions take the equality proxy (#375)", { 247 local_comparable_tuple() 248 x <- tuple(c(1, 2, 1), 1:3) 249 250 expect_identical(vec_unique_loc(x), 1:2) 251 expect_identical(unique(x), tuple(c(1, 2), 1:2)) 252 253 expect_true(vec_duplicate_any(x)) 254 expect_identical(vec_duplicate_id(x), c(1L, 2L, 1L)) 255 expect_identical(vec_unique_count(x), 2L) 256 257 expect_identical(vec_duplicate_detect(x), c(TRUE, FALSE, TRUE)) 258}) 259 260test_that("can take the unique loc of 1d arrays (#461)", { 261 x <- array(c(1, 1, 2, 2, 3)) 262 y <- array(c(1, 1, 2, 2, 3), dimnames = list(NULL)) 263 expect_identical(vctrs::vec_unique_loc(x), int(1, 3, 5)) 264 expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5)) 265 266 z <- array(c(1, 1, 2, 2, 3, 4), c(3, 2)) 267 expect_silent(expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5))) 268}) 269 270test_that("matching functions work with different encodings", { 271 encs <- encodings() 272 273 expect_equal(vec_match(encs, encs[1]), rep(1, 3)) 274 expect_equal(vec_in(encs, encs[1]), rep(TRUE, 3)) 275}) 276 277test_that("matching functions take the equality proxy recursively", { 278 local_comparable_tuple() 279 280 x <- tuple(c(1, 2), 1:2) 281 df <- data_frame(x = x) 282 283 y <- tuple(c(2, 3), c(3, 3)) 284 df2 <- data_frame(x = y) 285 286 expect_equal(vec_match(df, df2), c(NA, 1)) 287 expect_equal(vec_in(df, df2), c(FALSE, TRUE)) 288}) 289 290test_that("can propagate missing values while matching", { 291 exp <- c(NA, 3L, NA, 1L) 292 expect_identical(vec_match(lgl(NA, TRUE, NA, FALSE), lgl(FALSE, NA, TRUE), na_equal = FALSE), exp) 293 expect_identical(vec_match(int(NA, 1L, NA, 2L), int(2L, NA, 1L), na_equal = FALSE), exp) 294 expect_identical(vec_match(dbl(NA, 1, NA, 2), dbl(2, NA, 1), na_equal = FALSE), exp) 295 expect_identical(vec_match(cpl(NA, 1, NA, 2), cpl(2, NA, 1), na_equal = FALSE), exp) 296 expect_identical(vec_match(chr(NA, "1", NA, "2"), chr("2", NA, "1"), na_equal = FALSE), exp) 297 expect_identical(vec_match(list(NULL, 1, NULL, 2), list(2, NULL, 1), na_equal = FALSE), exp) 298 299 # No missing values for raw vectors 300 expect_identical(vec_match(bytes(0, 1, 0, 2), bytes(2, 0, 1), na_equal = FALSE), c(2L, 3L, 2L, 1L)) 301}) 302 303test_that("can propagate NaN as a missing value (#1252)", { 304 expect_identical(vec_match(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), int(NA, NA)) 305 expect_identical(vec_in(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), lgl(NA, NA)) 306}) 307 308test_that("missing values are propagated across columns", { 309 for (na_value in list(NA, na_int, na_dbl, na_cpl, na_chr, list(NULL))) { 310 df <- data_frame(x = 1, y = data_frame(foo = 2, bar = na_value), z = 3) 311 expect_identical(vec_match(df, df), 1L) 312 expect_identical(vec_match(df, df, na_equal = FALSE), na_int) 313 } 314}) 315 316test_that("can't supply NA as `na_equal`", { 317 expect_error(vec_match(NA, NA, na_equal = NA), "single `TRUE` or `FALSE`") 318}) 319 320test_that("dictionary tools have informative errors", { 321 verify_output(test_path("error", "test-dictionary.txt"), { 322 "# vec_match() and vec_in() check types" 323 df1 <- data_frame(x = data_frame(foo = 1)) 324 df2 <- data_frame(x = data_frame(foo = "")) 325 vec_match(df1, df2) 326 vec_match(df1, df2, needles_arg = "n", haystack_arg = "h") 327 vec_in(df1, df2) 328 vec_in(df1, df2, needles_arg = "n", haystack_arg = "h") 329 }) 330}) 331 332test_that("vec_match() and vec_in() silently fall back to base data frame", { 333 expect_silent(expect_identical( 334 vec_match(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), 335 1:32 336 )) 337 expect_silent(expect_identical( 338 vec_in(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), 339 rep(TRUE, 32) 340 )) 341}) 342