1
2# counting ----------------------------------------------------------------
3
4test_that("vec_count counts number observations", {
5  x <- vec_count(rep(1:3, 1:3), sort = "key")
6  expect_equal(x, data.frame(key = 1:3, count = 1:3))
7})
8
9test_that("vec_count works with matrices", {
10  x <- matrix(c(1, 1, 1, 2, 2, 1), c(3, 2))
11
12  out <- vec_count(x)
13  exp <- data_frame(key = c(NA, NA), count = int(2L, 1L))
14  exp$key <- vec_slice(x, c(1, 3))
15
16  expect_identical(out, exp)
17})
18
19test_that("vec_count works with arrays", {
20  x <- array(c(rep(1, 3), rep(2, 3)), dim = c(3, 2, 1))
21  expect <- data.frame(key = NA, count = 3)
22  expect$key <- vec_slice(x, 1L)
23  expect_equal(vec_count(x), expect)
24})
25
26test_that("vec_count works for zero-length input", {
27  x <- vec_count(integer(), sort = "none")
28  expect_equal(x, data.frame(key = integer(), count = integer()))
29})
30
31test_that("vec_count works with different encodings", {
32  x <- vec_count(encodings())
33  expect_equal(x, new_data_frame(list(key = encodings()[1], count = 3L)))
34})
35
36test_that("vec_count recursively takes the equality proxy", {
37  local_comparable_tuple()
38
39  x <- tuple(c(1, 1, 2), 1:3)
40  df <- data_frame(x = x)
41  expect <- data_frame(key = vec_slice(df, c(1, 3)), count = c(2L, 1L))
42
43  expect_equal(vec_count(df), expect)
44})
45
46# duplicates and uniques --------------------------------------------------
47
48test_that("vec_duplicated reports on duplicates regardless of position", {
49  x <- c(1, 1, 2, 3, 4, 4)
50  expect_equal(vec_duplicate_detect(x), c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE))
51})
52
53test_that("vec_duplicate_any returns single TRUE/FALSE", {
54  expect_false(vec_duplicate_any(c(1:10)))
55  expect_true(vec_duplicate_any(c(1:10, 1)))
56})
57
58test_that("vec_duplicate_id gives position of first found", {
59  x <- c(1, 2, 3, 1, 4)
60  expect_equal(vec_duplicate_id(x), c(1, 2, 3, 1, 5))
61})
62
63test_that("vec_unique matches unique", {
64  x <- sample(100, 1000, replace = TRUE)
65  expect_equal(vec_unique(x), unique(x))
66  expect_equal(vec_unique(c("x", "x")), "x")
67})
68
69test_that("vec_unique matches unique for matrices", {
70  x <- matrix(c(1, 1, 2, 2), ncol = 2)
71  expect_equal(vec_unique(x), unique(x))
72})
73
74test_that("vec_unique_count matches length + unique", {
75  x <- sample(100, 1000, replace = TRUE)
76  expect_equal(vec_unique_count(x), length(unique(x)))
77})
78
79test_that("also works for data frames", {
80  df <- data.frame(x = 1:3, y = letters[3:1], stringsAsFactors = FALSE)
81  idx <- c(1L, 1L, 1L, 2L, 2L, 3L)
82  df2 <- df[idx, , drop = FALSE]
83  rownames(df2) <- NULL
84
85  expect_equal(vec_duplicate_detect(df2), vec_duplicate_detect(idx))
86  expect_equal(vec_unique(df2), vec_slice(df, vec_unique(idx)))
87
88  count <- vec_count(df2, sort = "key")
89  expect_equal(count$key, df)
90  expect_equal(count$count, vec_count(idx)$count)
91
92  exp <- tibble(x = c(1, 1, 2), y = c(1, 2, 3))
93  expect_identical(vec_unique(vec_slice(exp, c(1, 1, 2, 3))), exp)
94})
95
96test_that("vec_unique() handles matrices (#327)", {
97  x <- matrix(c(1, 2, 3, 4), c(2, 2))
98  y <- matrix(c(1, 2, 3, 5), c(2, 2))
99  expect_identical(vec_unique(list(x, x)), list(x))
100  expect_identical(vec_unique(list(x, y)), list(x, y))
101
102  x <- matrix(c(1, 2, 1, 1, 2, 1), nrow = 3)
103  expect_identical(vec_unique(x), vec_slice(x, 1:2))
104})
105
106test_that("vec_unique() works with 1D arrays", {
107  # 1D arrays are dispatched to `as.data.frame.vector()` which
108  # currently does not strip dimensions. This caused an infinite
109  # recursion.
110  expect_identical(vec_unique(array(1:2)), array(1:2))
111
112  x <- new_vctr(c(1, 1, 1, 2, 1, 2), dim = c(3, 2))
113  expect_identical(vec_unique(x), new_vctr(c(1, 1, 2, 1), dim = c(2, 2)))
114})
115
116test_that("unique functions take the equality proxy (#375)", {
117  local_comparable_tuple()
118  x <- tuple(c(1, 2, 1), 1:3)
119
120  expect_true(vec_in(tuple(2, 100), x))
121  expect_identical(vec_match(tuple(2, 100), x), 2L)
122})
123
124test_that("unique functions take the equality proxy recursively", {
125  local_comparable_tuple()
126
127  x <- tuple(c(1, 1, 2), 1:3)
128  df <- data_frame(x = x)
129
130  expect_equal(vec_unique(df), vec_slice(df, c(1, 3)))
131  expect_equal(vec_unique_count(df), 2L)
132  expect_equal(vec_unique_loc(df), c(1, 3))
133})
134
135test_that("duplicate functions take the equality proxy recursively", {
136  local_comparable_tuple()
137
138  x <- tuple(c(1, 1, 2), 1:3)
139  df <- data_frame(x = x)
140
141  expect_equal(vec_duplicate_any(df), TRUE)
142  expect_equal(vec_duplicate_detect(df), c(TRUE, TRUE, FALSE))
143  expect_equal(vec_duplicate_id(df), c(1, 1, 3))
144})
145
146test_that("unique functions treat positive and negative 0 as equivalent (#637)", {
147  expect_equal(vec_unique(c(0, -0)), 0)
148  expect_equal(vec_unique_count(c(0, -0)), 1)
149  expect_equal(vec_unique_loc(c(0, -0)), 1)
150})
151
152test_that("unique functions work with different encodings", {
153  encs <- encodings()
154
155  expect_equal(vec_unique(encs), encs[1])
156  expect_equal(vec_unique_count(encs), 1L)
157  expect_equal(vec_unique_loc(encs), 1L)
158})
159
160test_that("unique functions can handle scalar types in lists", {
161  x <- list(x = a ~ b, y = a ~ b, z = a ~ c)
162  expect_equal(vec_unique(x), vec_slice(x, c(1, 3)))
163
164  x <- list(x = call("x"), y = call("y"), z = call("x"))
165  expect_equal(vec_unique(x), vec_slice(x, c(1, 2)))
166})
167
168test_that("duplicate functions works with different encodings", {
169  encs <- encodings()
170
171  expect_equal(vec_duplicate_id(encs), rep(1, 3))
172  expect_equal(vec_duplicate_detect(encs), rep(TRUE, 3))
173  expect_equal(vec_duplicate_any(encs), TRUE)
174})
175
176test_that("vec_unique() returns differently encoded strings in the order they appear", {
177  encs <- encodings()
178  x <- c(encs$unknown, encs$utf8)
179  y <- c(encs$utf8, encs$unknown)
180
181  expect_equal_encoding(vec_unique(x), encs$unknown)
182  expect_equal_encoding(vec_unique(y), encs$utf8)
183})
184
185test_that("vec_unique() works on lists containing expressions", {
186  x <- list(expression(x), expression(y), expression(x))
187  expect_equal(vec_unique(x), x[1:2])
188})
189
190test_that("vec_unique() works with glm objects (#643)", {
191  # class(model$family$initialize) == "expression"
192  model <- glm(mpg ~ wt, data = mtcars)
193  expect_equal(vec_unique(list(model, model)), list(model))
194})
195
196test_that("can take the unique locations of dfs with list-cols", {
197  df <- tibble(x = list(1, 2, 1, 3), y = list(1, 2, 1, 3))
198  expect_identical(vec_unique_loc(df), c(1L, 2L, 4L))
199})
200
201
202# matching ----------------------------------------------------------------
203
204test_that("vec_match() matches match()", {
205  n <- c(1:3, NA)
206  h <- c(4, 2, 1, NA)
207  expect_equal(vec_match(n, h), match(n, h))
208
209  expect_equal(vec_match(1.5, c(2, 1.5, NA)), match(1.5, c(2, 1.5, NA)))
210  expect_equal(vec_match("x", "x"), match("x", "x"))
211})
212
213test_that("vec_match() and vec_in() check types", {
214  verify_errors({
215    df1 <- data_frame(x = data_frame(foo = 1))
216    df2 <- data_frame(x = data_frame(foo = ""))
217    expect_error(vec_match(df1, df2), class = "vctrs_error_incompatible_type")
218    expect_error(vec_match(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type")
219    expect_error(vec_in(df1, df2), class = "vctrs_error_incompatible_type")
220    expect_error(vec_in(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type")
221  })
222})
223
224test_that("vec_in() matches %in%", {
225  n <- c(1:3, NA)
226  h <- c(4, 2, 1, NA)
227
228  expect_equal(vec_in(n, h), n %in% h)
229})
230
231test_that("can opt out of NA matching", {
232  n <- c(1, NA)
233  h <- c(1:3, NA)
234
235  expect_equal(vec_in(n, h, na_equal = FALSE), c(TRUE, NA))
236})
237
238test_that("vec_match works with empty data frame", {
239  out <- vec_match(
240    new_data_frame(n = 3L),
241    new_data_frame(n = 0L)
242  )
243  expect_equal(out, vec_init(integer(), 3))
244})
245
246test_that("matching functions take the equality proxy (#375)", {
247  local_comparable_tuple()
248  x <- tuple(c(1, 2, 1), 1:3)
249
250  expect_identical(vec_unique_loc(x), 1:2)
251  expect_identical(unique(x), tuple(c(1, 2), 1:2))
252
253  expect_true(vec_duplicate_any(x))
254  expect_identical(vec_duplicate_id(x), c(1L, 2L, 1L))
255  expect_identical(vec_unique_count(x), 2L)
256
257  expect_identical(vec_duplicate_detect(x), c(TRUE, FALSE, TRUE))
258})
259
260test_that("can take the unique loc of 1d arrays (#461)", {
261  x <- array(c(1, 1, 2, 2, 3))
262  y <- array(c(1, 1, 2, 2, 3), dimnames = list(NULL))
263  expect_identical(vctrs::vec_unique_loc(x), int(1, 3, 5))
264  expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5))
265
266  z <- array(c(1, 1, 2, 2, 3, 4), c(3, 2))
267  expect_silent(expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5)))
268})
269
270test_that("matching functions work with different encodings", {
271  encs <- encodings()
272
273  expect_equal(vec_match(encs, encs[1]), rep(1, 3))
274  expect_equal(vec_in(encs, encs[1]), rep(TRUE, 3))
275})
276
277test_that("matching functions take the equality proxy recursively", {
278  local_comparable_tuple()
279
280  x <- tuple(c(1, 2), 1:2)
281  df <- data_frame(x = x)
282
283  y <- tuple(c(2, 3), c(3, 3))
284  df2 <- data_frame(x = y)
285
286  expect_equal(vec_match(df, df2), c(NA, 1))
287  expect_equal(vec_in(df, df2), c(FALSE, TRUE))
288})
289
290test_that("can propagate missing values while matching", {
291  exp <- c(NA, 3L, NA, 1L)
292  expect_identical(vec_match(lgl(NA, TRUE, NA, FALSE), lgl(FALSE, NA, TRUE), na_equal = FALSE), exp)
293  expect_identical(vec_match(int(NA, 1L, NA, 2L), int(2L, NA, 1L), na_equal = FALSE), exp)
294  expect_identical(vec_match(dbl(NA, 1, NA, 2), dbl(2, NA, 1), na_equal = FALSE), exp)
295  expect_identical(vec_match(cpl(NA, 1, NA, 2), cpl(2, NA, 1), na_equal = FALSE), exp)
296  expect_identical(vec_match(chr(NA, "1", NA, "2"), chr("2", NA, "1"), na_equal = FALSE), exp)
297  expect_identical(vec_match(list(NULL, 1, NULL, 2), list(2, NULL, 1), na_equal = FALSE), exp)
298
299  # No missing values for raw vectors
300  expect_identical(vec_match(bytes(0, 1, 0, 2), bytes(2, 0, 1), na_equal = FALSE), c(2L, 3L, 2L, 1L))
301})
302
303test_that("can propagate NaN as a missing value (#1252)", {
304  expect_identical(vec_match(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), int(NA, NA))
305  expect_identical(vec_in(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), lgl(NA, NA))
306})
307
308test_that("missing values are propagated across columns", {
309  for (na_value in list(NA, na_int, na_dbl, na_cpl, na_chr, list(NULL))) {
310    df <- data_frame(x = 1, y = data_frame(foo = 2, bar = na_value), z = 3)
311    expect_identical(vec_match(df, df), 1L)
312    expect_identical(vec_match(df, df, na_equal = FALSE), na_int)
313  }
314})
315
316test_that("can't supply NA as `na_equal`", {
317  expect_error(vec_match(NA, NA, na_equal = NA), "single `TRUE` or `FALSE`")
318})
319
320test_that("dictionary tools have informative errors", {
321  verify_output(test_path("error", "test-dictionary.txt"), {
322    "# vec_match() and vec_in() check types"
323    df1 <- data_frame(x = data_frame(foo = 1))
324    df2 <- data_frame(x = data_frame(foo = ""))
325    vec_match(df1, df2)
326    vec_match(df1, df2, needles_arg = "n", haystack_arg = "h")
327    vec_in(df1, df2)
328    vec_in(df1, df2, needles_arg = "n", haystack_arg = "h")
329  })
330})
331
332test_that("vec_match() and vec_in() silently fall back to base data frame", {
333  expect_silent(expect_identical(
334    vec_match(foobar(mtcars), foobar(tibble::as_tibble(mtcars))),
335    1:32
336  ))
337  expect_silent(expect_identical(
338    vec_in(foobar(mtcars), foobar(tibble::as_tibble(mtcars))),
339    rep(TRUE, 32)
340  ))
341})
342