1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18test_that("list_compute_functions", {
19  allfuncs <- list_compute_functions()
20  expect_false(all(grepl("min", allfuncs)))
21  justmins <- list_compute_functions("^min")
22  expect_true(length(justmins) > 0)
23  expect_true(all(grepl("min", justmins)))
24  no_hash_funcs <- list_compute_functions("^hash")
25  expect_true(length(no_hash_funcs) == 0)
26})
27
28test_that("sum.Array", {
29  ints <- 1:5
30  a <- Array$create(ints)
31  expect_r6_class(sum(a), "Scalar")
32  expect_identical(as.integer(sum(a)), sum(ints))
33
34  floats <- c(1.3, 2.4, 3)
35  f <- Array$create(floats)
36  expect_identical(as.numeric(sum(f)), sum(floats))
37
38  floats <- c(floats, NA)
39  na <- Array$create(floats)
40  if (!grepl("devel", R.version.string)) {
41    # Valgrind on R-devel confuses NaN and NA_real_
42    # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html
43    expect_identical(as.numeric(sum(na)), sum(floats))
44  }
45  expect_r6_class(sum(na, na.rm = TRUE), "Scalar")
46  expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE))
47
48  bools <- c(TRUE, NA, TRUE, FALSE)
49  b <- Array$create(bools)
50  expect_identical(as.integer(sum(b)), sum(bools))
51  expect_identical(as.integer(sum(b, na.rm = TRUE)), sum(bools, na.rm = TRUE))
52})
53
54test_that("sum.ChunkedArray", {
55  a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
56  expect_r6_class(sum(a), "Scalar")
57  expect_true(is.na(as.vector(sum(a))))
58  expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35)
59})
60
61test_that("sum dots", {
62  a1 <- Array$create(1:4)
63  a2 <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
64  expect_identical(as.numeric(sum(a1, a2, na.rm = TRUE)), 45)
65})
66
67test_that("sum.Scalar", {
68  s <- Scalar$create(4)
69  expect_identical(as.numeric(s), as.numeric(sum(s)))
70})
71
72test_that("mean.Array", {
73  ints <- 1:4
74  a <- Array$create(ints)
75  expect_r6_class(mean(a), "Scalar")
76  expect_identical(as.vector(mean(a)), mean(ints))
77
78  floats <- c(1.3, 2.4, 3)
79  f <- Array$create(floats)
80  expect_identical(as.vector(mean(f)), mean(floats))
81
82  floats <- c(floats, NA)
83  na <- Array$create(floats)
84  if (!grepl("devel", R.version.string)) {
85    # Valgrind on R-devel confuses NaN and NA_real_
86    # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html
87    expect_identical(as.vector(mean(na)), mean(floats))
88  }
89  expect_r6_class(mean(na, na.rm = TRUE), "Scalar")
90  expect_identical(as.vector(mean(na, na.rm = TRUE)), mean(floats, na.rm = TRUE))
91
92  bools <- c(TRUE, NA, TRUE, FALSE)
93  b <- Array$create(bools)
94  expect_identical(as.vector(mean(b)), mean(bools))
95  expect_identical(as.integer(sum(b, na.rm = TRUE)), sum(bools, na.rm = TRUE))
96})
97
98test_that("mean.ChunkedArray", {
99  a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
100  expect_r6_class(mean(a), "Scalar")
101  expect_true(is.na(as.vector(mean(a))))
102  expect_identical(as.vector(mean(a, na.rm = TRUE)), 35 / 13)
103})
104
105test_that("mean.Scalar", {
106  s <- Scalar$create(4)
107  expect_equal(s, mean(s))
108})
109
110test_that("Bad input handling of call_function", {
111  expect_error(
112    call_function("sum", 2, 3),
113    'Argument 1 is of class numeric but it must be one of "Array", "ChunkedArray", "RecordBatch", "Table", or "Scalar"'
114  )
115})
116
117test_that("min.Array", {
118  ints <- 1:4
119  a <- Array$create(ints)
120  expect_r6_class(min(a), "Scalar")
121  expect_identical(as.vector(min(a)), min(ints))
122
123  floats <- c(1.3, 3, 2.4)
124  f <- Array$create(floats)
125  expect_identical(as.vector(min(f)), min(floats))
126
127  floats <- c(floats, NA)
128  na <- Array$create(floats)
129  expect_identical(as.vector(min(na)), min(floats))
130  expect_r6_class(min(na, na.rm = TRUE), "Scalar")
131  expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE))
132
133  bools <- c(TRUE, TRUE, FALSE)
134  b <- Array$create(bools)
135  # R is inconsistent here: typeof(min(NA)) == "integer", not "logical"
136  expect_identical(as.vector(min(b)), as.logical(min(bools)))
137})
138
139test_that("max.Array", {
140  ints <- 1:4
141  a <- Array$create(ints)
142  expect_r6_class(max(a), "Scalar")
143  expect_identical(as.vector(max(a)), max(ints))
144
145  floats <- c(1.3, 3, 2.4)
146  f <- Array$create(floats)
147  expect_identical(as.vector(max(f)), max(floats))
148
149  floats <- c(floats, NA)
150  na <- Array$create(floats)
151  expect_identical(as.vector(max(na)), max(floats))
152  expect_r6_class(max(na, na.rm = TRUE), "Scalar")
153  expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE))
154
155  bools <- c(TRUE, TRUE, FALSE)
156  b <- Array$create(bools)
157  # R is inconsistent here: typeof(max(NA)) == "integer", not "logical"
158  expect_identical(as.vector(max(b)), as.logical(max(bools)))
159})
160
161test_that("min.ChunkedArray", {
162  ints <- 1:4
163  a <- ChunkedArray$create(ints)
164  expect_r6_class(min(a), "Scalar")
165  expect_identical(as.vector(min(a)), min(ints))
166
167  floats <- c(1.3, 3, 2.4)
168  f <- ChunkedArray$create(floats)
169  expect_identical(as.vector(min(f)), min(floats))
170
171  floats <- c(floats, NA)
172  na <- ChunkedArray$create(floats)
173  expect_identical(as.vector(min(na)), min(floats))
174  expect_r6_class(min(na, na.rm = TRUE), "Scalar")
175  expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE))
176
177  bools <- c(TRUE, TRUE, FALSE)
178  b <- ChunkedArray$create(bools)
179  # R is inconsistent here: typeof(min(NA)) == "integer", not "logical"
180  expect_identical(as.vector(min(b)), as.logical(min(bools)))
181})
182
183test_that("max.ChunkedArray", {
184  ints <- 1:4
185  a <- ChunkedArray$create(ints)
186  expect_r6_class(max(a), "Scalar")
187  expect_identical(as.vector(max(a)), max(ints))
188
189  floats <- c(1.3, 3, 2.4)
190  f <- ChunkedArray$create(floats)
191  expect_identical(as.vector(max(f)), max(floats))
192
193  floats <- c(floats, NA)
194  na <- ChunkedArray$create(floats)
195  expect_identical(as.vector(max(na)), max(floats))
196  expect_r6_class(max(na, na.rm = TRUE), "Scalar")
197  expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE))
198
199  bools <- c(TRUE, TRUE, FALSE)
200  b <- ChunkedArray$create(bools)
201  # R is inconsistent here: typeof(max(NA)) == "integer", not "logical"
202  expect_identical(as.vector(max(b)), as.logical(max(bools)))
203})
204
205test_that("Edge cases", {
206  a <- Array$create(NA)
207  for (type in c(int32(), float64(), bool())) {
208    expect_as_vector(sum(a$cast(type), na.rm = TRUE), sum(NA, na.rm = TRUE))
209    expect_as_vector(mean(a$cast(type), na.rm = TRUE), mean(NA, na.rm = TRUE))
210    expect_as_vector(
211      min(a$cast(type), na.rm = TRUE),
212      # Suppress the base R warning about no non-missing arguments
213      suppressWarnings(min(NA, na.rm = TRUE))
214    )
215    expect_as_vector(
216      max(a$cast(type), na.rm = TRUE),
217      suppressWarnings(max(NA, na.rm = TRUE))
218    )
219  }
220})
221
222test_that("quantile.Array and quantile.ChunkedArray", {
223  a <- Array$create(c(0, 1, 2, 3))
224  ca <- ChunkedArray$create(c(0, 1), c(2, 3))
225  probs <- c(0.49, 0.51)
226  for (ad in list(a, ca)) {
227    for (type in c(int32(), uint64(), float64())) {
228      expect_equal(
229        quantile(ad$cast(type), probs = probs, interpolation = "linear"),
230        Array$create(c(1.47, 1.53))
231      )
232      expect_equal(
233        quantile(ad$cast(type), probs = probs, interpolation = "lower"),
234        Array$create(c(1, 1))$cast(type)
235      )
236      expect_equal(
237        quantile(ad$cast(type), probs = probs, interpolation = "higher"),
238        Array$create(c(2, 2))$cast(type)
239      )
240      expect_equal(
241        quantile(ad$cast(type), probs = probs, interpolation = "nearest"),
242        Array$create(c(1, 2))$cast(type)
243      )
244      expect_equal(
245        quantile(ad$cast(type), probs = probs, interpolation = "midpoint"),
246        Array$create(c(1.5, 1.5))
247      )
248    }
249  }
250})
251
252test_that("quantile and median NAs, edge cases, and exceptions", {
253  expect_equal(
254    quantile(Array$create(c(1, 2)), probs = c(0, 1)),
255    Array$create(c(1, 2))
256  )
257  expect_error(
258    quantile(Array$create(c(1, 2, NA))),
259    "Missing values not allowed if 'na.rm' is FALSE"
260  )
261  expect_equal(
262    quantile(Array$create(numeric(0))),
263    Array$create(rep(NA_real_, 5))
264  )
265  expect_equal(
266    quantile(Array$create(rep(NA_integer_, 3)), na.rm = TRUE),
267    Array$create(rep(NA_real_, 5))
268  )
269  expect_equal(
270    quantile(Scalar$create(0L)),
271    Array$create(rep(0, 5))
272  )
273  expect_equal(
274    median(Scalar$create(1L)),
275    Scalar$create(1)
276  )
277  expect_error(
278    quantile(Array$create(1:3), type = 9),
279    "not supported"
280  )
281})
282
283test_that("median passes ... args to quantile", {
284  skip_if(
285    !"..." %in% names(formals(median)),
286    "The median generic lacks dots in R 3.3.0 and earlier"
287  )
288  expect_equal(
289    median(Array$create(c(1, 2)), interpolation = "higher"),
290    Scalar$create(2)
291  )
292  expect_error(
293    median(Array$create(c(1, 2)), probs = c(.25, .75))
294  )
295})
296
297test_that("median.Array and median.ChunkedArray", {
298  compare_expression(
299    median(.input),
300    1:4
301  )
302  compare_expression(
303    median(.input),
304    1:5
305  )
306  compare_expression(
307    median(.input),
308    numeric(0)
309  )
310  compare_expression(
311    median(.input, na.rm = FALSE),
312    c(1, 2, NA)
313  )
314  compare_expression(
315    median(.input, na.rm = TRUE),
316    c(1, 2, NA)
317  )
318  compare_expression(
319    median(.input, na.rm = TRUE),
320    NA_real_
321  )
322  compare_expression(
323    median(.input, na.rm = FALSE),
324    c(1, 2, NA)
325  )
326  compare_expression(
327    median(.input, na.rm = TRUE),
328    c(1, 2, NA)
329  )
330  compare_expression(
331    median(.input, na.rm = TRUE),
332    NA_real_
333  )
334})
335
336test_that("unique.Array", {
337  a <- Array$create(c(1, 4, 3, 1, 1, 3, 4))
338  expect_equal(unique(a), Array$create(c(1, 4, 3)))
339  ca <- ChunkedArray$create(a, a)
340  expect_equal(unique(ca), Array$create(c(1, 4, 3)))
341})
342
343test_that("match_arrow", {
344  a <- Array$create(c(1, 4, 3, 1, 1, 3, 4))
345  tab <- c(4, 3, 2, 1)
346  expect_equal(match_arrow(a, tab), Array$create(c(3L, 0L, 1L, 3L, 3L, 1L, 0L)))
347
348  ca <- ChunkedArray$create(c(1, 4, 3, 1, 1, 3, 4))
349  expect_equal(match_arrow(ca, tab), ChunkedArray$create(c(3L, 0L, 1L, 3L, 3L, 1L, 0L)))
350
351  sc <- Scalar$create(3)
352  expect_equal(match_arrow(sc, tab), Scalar$create(1L))
353
354  vec <- c(1, 2)
355  expect_equal(match_arrow(vec, tab), Array$create(c(3L, 2L)))
356})
357
358test_that("is_in", {
359  a <- Array$create(c(9, 4, 3))
360  tab <- c(4, 3, 2, 1)
361  expect_equal(is_in(a, tab), Array$create(c(FALSE, TRUE, TRUE)))
362
363  ca <- ChunkedArray$create(c(9, 4, 3))
364  expect_equal(is_in(ca, tab), ChunkedArray$create(c(FALSE, TRUE, TRUE)))
365
366  sc <- Scalar$create(3)
367  expect_equal(is_in(sc, tab), Scalar$create(TRUE))
368
369  vec <- c(1, 9)
370  expect_equal(is_in(vec, tab), Array$create(c(TRUE, FALSE)))
371})
372
373test_that("value_counts", {
374  a <- Array$create(c(1, 4, 3, 1, 1, 3, 4))
375  result_df <- tibble::tibble(
376    values = c(1, 4, 3),
377    counts = c(3L, 2L, 2L)
378  )
379  result <- Array$create(
380    result_df,
381    type = struct(values = float64(), counts = int64())
382  )
383  expect_equal(value_counts(a), result)
384  expect_identical(as.data.frame(value_counts(a)), result_df)
385  expect_identical(as.vector(value_counts(a)$counts), result_df$counts)
386})
387
388test_that("any.Array and any.ChunkedArray", {
389  data <- c(1:10, NA, NA)
390
391  compare_expression(any(.input > 5), data)
392  compare_expression(any(.input > 5, na.rm = TRUE), data)
393  compare_expression(any(.input < 1), data)
394  compare_expression(any(.input < 1, na.rm = TRUE), data)
395
396  data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE)
397
398  compare_expression(any(.input), data_logical)
399  compare_expression(any(.input, na.rm = FALSE), data_logical)
400  compare_expression(any(.input, na.rm = TRUE), data_logical)
401})
402
403test_that("all.Array and all.ChunkedArray", {
404  data <- c(1:10, NA, NA)
405
406  compare_expression(all(.input > 5), data)
407  compare_expression(all(.input > 5, na.rm = TRUE), data)
408
409  compare_expression(all(.input < 11), data)
410  compare_expression(all(.input < 11, na.rm = TRUE), data)
411
412  data_logical <- c(TRUE, TRUE, NA)
413
414  compare_expression(all(.input), data_logical)
415  compare_expression(all(.input, na.rm = TRUE), data_logical)
416})
417
418test_that("variance", {
419  data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
420  arr <- Array$create(data)
421  chunked_arr <- ChunkedArray$create(data)
422
423  expect_equal(call_function("variance", arr, options = list(ddof = 5)), Scalar$create(34596))
424  expect_equal(call_function("variance", chunked_arr, options = list(ddof = 5)), Scalar$create(34596))
425})
426
427test_that("stddev", {
428  data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
429  arr <- Array$create(data)
430  chunked_arr <- ChunkedArray$create(data)
431
432  expect_equal(call_function("stddev", arr, options = list(ddof = 5)), Scalar$create(186))
433  expect_equal(call_function("stddev", chunked_arr, options = list(ddof = 5)), Scalar$create(186))
434})
435