1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18test_that("list_compute_functions", { 19 allfuncs <- list_compute_functions() 20 expect_false(all(grepl("min", allfuncs))) 21 justmins <- list_compute_functions("^min") 22 expect_true(length(justmins) > 0) 23 expect_true(all(grepl("min", justmins))) 24 no_hash_funcs <- list_compute_functions("^hash") 25 expect_true(length(no_hash_funcs) == 0) 26}) 27 28test_that("sum.Array", { 29 ints <- 1:5 30 a <- Array$create(ints) 31 expect_r6_class(sum(a), "Scalar") 32 expect_identical(as.integer(sum(a)), sum(ints)) 33 34 floats <- c(1.3, 2.4, 3) 35 f <- Array$create(floats) 36 expect_identical(as.numeric(sum(f)), sum(floats)) 37 38 floats <- c(floats, NA) 39 na <- Array$create(floats) 40 if (!grepl("devel", R.version.string)) { 41 # Valgrind on R-devel confuses NaN and NA_real_ 42 # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html 43 expect_identical(as.numeric(sum(na)), sum(floats)) 44 } 45 expect_r6_class(sum(na, na.rm = TRUE), "Scalar") 46 expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE)) 47 48 bools <- c(TRUE, NA, TRUE, FALSE) 49 b <- Array$create(bools) 50 expect_identical(as.integer(sum(b)), sum(bools)) 51 expect_identical(as.integer(sum(b, na.rm = TRUE)), sum(bools, na.rm = TRUE)) 52}) 53 54test_that("sum.ChunkedArray", { 55 a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5) 56 expect_r6_class(sum(a), "Scalar") 57 expect_true(is.na(as.vector(sum(a)))) 58 expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35) 59}) 60 61test_that("sum dots", { 62 a1 <- Array$create(1:4) 63 a2 <- ChunkedArray$create(1:4, c(1:4, NA), 1:5) 64 expect_identical(as.numeric(sum(a1, a2, na.rm = TRUE)), 45) 65}) 66 67test_that("sum.Scalar", { 68 s <- Scalar$create(4) 69 expect_identical(as.numeric(s), as.numeric(sum(s))) 70}) 71 72test_that("mean.Array", { 73 ints <- 1:4 74 a <- Array$create(ints) 75 expect_r6_class(mean(a), "Scalar") 76 expect_identical(as.vector(mean(a)), mean(ints)) 77 78 floats <- c(1.3, 2.4, 3) 79 f <- Array$create(floats) 80 expect_identical(as.vector(mean(f)), mean(floats)) 81 82 floats <- c(floats, NA) 83 na <- Array$create(floats) 84 if (!grepl("devel", R.version.string)) { 85 # Valgrind on R-devel confuses NaN and NA_real_ 86 # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html 87 expect_identical(as.vector(mean(na)), mean(floats)) 88 } 89 expect_r6_class(mean(na, na.rm = TRUE), "Scalar") 90 expect_identical(as.vector(mean(na, na.rm = TRUE)), mean(floats, na.rm = TRUE)) 91 92 bools <- c(TRUE, NA, TRUE, FALSE) 93 b <- Array$create(bools) 94 expect_identical(as.vector(mean(b)), mean(bools)) 95 expect_identical(as.integer(sum(b, na.rm = TRUE)), sum(bools, na.rm = TRUE)) 96}) 97 98test_that("mean.ChunkedArray", { 99 a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5) 100 expect_r6_class(mean(a), "Scalar") 101 expect_true(is.na(as.vector(mean(a)))) 102 expect_identical(as.vector(mean(a, na.rm = TRUE)), 35 / 13) 103}) 104 105test_that("mean.Scalar", { 106 s <- Scalar$create(4) 107 expect_equal(s, mean(s)) 108}) 109 110test_that("Bad input handling of call_function", { 111 expect_error( 112 call_function("sum", 2, 3), 113 'Argument 1 is of class numeric but it must be one of "Array", "ChunkedArray", "RecordBatch", "Table", or "Scalar"' 114 ) 115}) 116 117test_that("min.Array", { 118 ints <- 1:4 119 a <- Array$create(ints) 120 expect_r6_class(min(a), "Scalar") 121 expect_identical(as.vector(min(a)), min(ints)) 122 123 floats <- c(1.3, 3, 2.4) 124 f <- Array$create(floats) 125 expect_identical(as.vector(min(f)), min(floats)) 126 127 floats <- c(floats, NA) 128 na <- Array$create(floats) 129 expect_identical(as.vector(min(na)), min(floats)) 130 expect_r6_class(min(na, na.rm = TRUE), "Scalar") 131 expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE)) 132 133 bools <- c(TRUE, TRUE, FALSE) 134 b <- Array$create(bools) 135 # R is inconsistent here: typeof(min(NA)) == "integer", not "logical" 136 expect_identical(as.vector(min(b)), as.logical(min(bools))) 137}) 138 139test_that("max.Array", { 140 ints <- 1:4 141 a <- Array$create(ints) 142 expect_r6_class(max(a), "Scalar") 143 expect_identical(as.vector(max(a)), max(ints)) 144 145 floats <- c(1.3, 3, 2.4) 146 f <- Array$create(floats) 147 expect_identical(as.vector(max(f)), max(floats)) 148 149 floats <- c(floats, NA) 150 na <- Array$create(floats) 151 expect_identical(as.vector(max(na)), max(floats)) 152 expect_r6_class(max(na, na.rm = TRUE), "Scalar") 153 expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE)) 154 155 bools <- c(TRUE, TRUE, FALSE) 156 b <- Array$create(bools) 157 # R is inconsistent here: typeof(max(NA)) == "integer", not "logical" 158 expect_identical(as.vector(max(b)), as.logical(max(bools))) 159}) 160 161test_that("min.ChunkedArray", { 162 ints <- 1:4 163 a <- ChunkedArray$create(ints) 164 expect_r6_class(min(a), "Scalar") 165 expect_identical(as.vector(min(a)), min(ints)) 166 167 floats <- c(1.3, 3, 2.4) 168 f <- ChunkedArray$create(floats) 169 expect_identical(as.vector(min(f)), min(floats)) 170 171 floats <- c(floats, NA) 172 na <- ChunkedArray$create(floats) 173 expect_identical(as.vector(min(na)), min(floats)) 174 expect_r6_class(min(na, na.rm = TRUE), "Scalar") 175 expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE)) 176 177 bools <- c(TRUE, TRUE, FALSE) 178 b <- ChunkedArray$create(bools) 179 # R is inconsistent here: typeof(min(NA)) == "integer", not "logical" 180 expect_identical(as.vector(min(b)), as.logical(min(bools))) 181}) 182 183test_that("max.ChunkedArray", { 184 ints <- 1:4 185 a <- ChunkedArray$create(ints) 186 expect_r6_class(max(a), "Scalar") 187 expect_identical(as.vector(max(a)), max(ints)) 188 189 floats <- c(1.3, 3, 2.4) 190 f <- ChunkedArray$create(floats) 191 expect_identical(as.vector(max(f)), max(floats)) 192 193 floats <- c(floats, NA) 194 na <- ChunkedArray$create(floats) 195 expect_identical(as.vector(max(na)), max(floats)) 196 expect_r6_class(max(na, na.rm = TRUE), "Scalar") 197 expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE)) 198 199 bools <- c(TRUE, TRUE, FALSE) 200 b <- ChunkedArray$create(bools) 201 # R is inconsistent here: typeof(max(NA)) == "integer", not "logical" 202 expect_identical(as.vector(max(b)), as.logical(max(bools))) 203}) 204 205test_that("Edge cases", { 206 a <- Array$create(NA) 207 for (type in c(int32(), float64(), bool())) { 208 expect_as_vector(sum(a$cast(type), na.rm = TRUE), sum(NA, na.rm = TRUE)) 209 expect_as_vector(mean(a$cast(type), na.rm = TRUE), mean(NA, na.rm = TRUE)) 210 expect_as_vector( 211 min(a$cast(type), na.rm = TRUE), 212 # Suppress the base R warning about no non-missing arguments 213 suppressWarnings(min(NA, na.rm = TRUE)) 214 ) 215 expect_as_vector( 216 max(a$cast(type), na.rm = TRUE), 217 suppressWarnings(max(NA, na.rm = TRUE)) 218 ) 219 } 220}) 221 222test_that("quantile.Array and quantile.ChunkedArray", { 223 a <- Array$create(c(0, 1, 2, 3)) 224 ca <- ChunkedArray$create(c(0, 1), c(2, 3)) 225 probs <- c(0.49, 0.51) 226 for (ad in list(a, ca)) { 227 for (type in c(int32(), uint64(), float64())) { 228 expect_equal( 229 quantile(ad$cast(type), probs = probs, interpolation = "linear"), 230 Array$create(c(1.47, 1.53)) 231 ) 232 expect_equal( 233 quantile(ad$cast(type), probs = probs, interpolation = "lower"), 234 Array$create(c(1, 1))$cast(type) 235 ) 236 expect_equal( 237 quantile(ad$cast(type), probs = probs, interpolation = "higher"), 238 Array$create(c(2, 2))$cast(type) 239 ) 240 expect_equal( 241 quantile(ad$cast(type), probs = probs, interpolation = "nearest"), 242 Array$create(c(1, 2))$cast(type) 243 ) 244 expect_equal( 245 quantile(ad$cast(type), probs = probs, interpolation = "midpoint"), 246 Array$create(c(1.5, 1.5)) 247 ) 248 } 249 } 250}) 251 252test_that("quantile and median NAs, edge cases, and exceptions", { 253 expect_equal( 254 quantile(Array$create(c(1, 2)), probs = c(0, 1)), 255 Array$create(c(1, 2)) 256 ) 257 expect_error( 258 quantile(Array$create(c(1, 2, NA))), 259 "Missing values not allowed if 'na.rm' is FALSE" 260 ) 261 expect_equal( 262 quantile(Array$create(numeric(0))), 263 Array$create(rep(NA_real_, 5)) 264 ) 265 expect_equal( 266 quantile(Array$create(rep(NA_integer_, 3)), na.rm = TRUE), 267 Array$create(rep(NA_real_, 5)) 268 ) 269 expect_equal( 270 quantile(Scalar$create(0L)), 271 Array$create(rep(0, 5)) 272 ) 273 expect_equal( 274 median(Scalar$create(1L)), 275 Scalar$create(1) 276 ) 277 expect_error( 278 quantile(Array$create(1:3), type = 9), 279 "not supported" 280 ) 281}) 282 283test_that("median passes ... args to quantile", { 284 skip_if( 285 !"..." %in% names(formals(median)), 286 "The median generic lacks dots in R 3.3.0 and earlier" 287 ) 288 expect_equal( 289 median(Array$create(c(1, 2)), interpolation = "higher"), 290 Scalar$create(2) 291 ) 292 expect_error( 293 median(Array$create(c(1, 2)), probs = c(.25, .75)) 294 ) 295}) 296 297test_that("median.Array and median.ChunkedArray", { 298 compare_expression( 299 median(.input), 300 1:4 301 ) 302 compare_expression( 303 median(.input), 304 1:5 305 ) 306 compare_expression( 307 median(.input), 308 numeric(0) 309 ) 310 compare_expression( 311 median(.input, na.rm = FALSE), 312 c(1, 2, NA) 313 ) 314 compare_expression( 315 median(.input, na.rm = TRUE), 316 c(1, 2, NA) 317 ) 318 compare_expression( 319 median(.input, na.rm = TRUE), 320 NA_real_ 321 ) 322 compare_expression( 323 median(.input, na.rm = FALSE), 324 c(1, 2, NA) 325 ) 326 compare_expression( 327 median(.input, na.rm = TRUE), 328 c(1, 2, NA) 329 ) 330 compare_expression( 331 median(.input, na.rm = TRUE), 332 NA_real_ 333 ) 334}) 335 336test_that("unique.Array", { 337 a <- Array$create(c(1, 4, 3, 1, 1, 3, 4)) 338 expect_equal(unique(a), Array$create(c(1, 4, 3))) 339 ca <- ChunkedArray$create(a, a) 340 expect_equal(unique(ca), Array$create(c(1, 4, 3))) 341}) 342 343test_that("match_arrow", { 344 a <- Array$create(c(1, 4, 3, 1, 1, 3, 4)) 345 tab <- c(4, 3, 2, 1) 346 expect_equal(match_arrow(a, tab), Array$create(c(3L, 0L, 1L, 3L, 3L, 1L, 0L))) 347 348 ca <- ChunkedArray$create(c(1, 4, 3, 1, 1, 3, 4)) 349 expect_equal(match_arrow(ca, tab), ChunkedArray$create(c(3L, 0L, 1L, 3L, 3L, 1L, 0L))) 350 351 sc <- Scalar$create(3) 352 expect_equal(match_arrow(sc, tab), Scalar$create(1L)) 353 354 vec <- c(1, 2) 355 expect_equal(match_arrow(vec, tab), Array$create(c(3L, 2L))) 356}) 357 358test_that("is_in", { 359 a <- Array$create(c(9, 4, 3)) 360 tab <- c(4, 3, 2, 1) 361 expect_equal(is_in(a, tab), Array$create(c(FALSE, TRUE, TRUE))) 362 363 ca <- ChunkedArray$create(c(9, 4, 3)) 364 expect_equal(is_in(ca, tab), ChunkedArray$create(c(FALSE, TRUE, TRUE))) 365 366 sc <- Scalar$create(3) 367 expect_equal(is_in(sc, tab), Scalar$create(TRUE)) 368 369 vec <- c(1, 9) 370 expect_equal(is_in(vec, tab), Array$create(c(TRUE, FALSE))) 371}) 372 373test_that("value_counts", { 374 a <- Array$create(c(1, 4, 3, 1, 1, 3, 4)) 375 result_df <- tibble::tibble( 376 values = c(1, 4, 3), 377 counts = c(3L, 2L, 2L) 378 ) 379 result <- Array$create( 380 result_df, 381 type = struct(values = float64(), counts = int64()) 382 ) 383 expect_equal(value_counts(a), result) 384 expect_identical(as.data.frame(value_counts(a)), result_df) 385 expect_identical(as.vector(value_counts(a)$counts), result_df$counts) 386}) 387 388test_that("any.Array and any.ChunkedArray", { 389 data <- c(1:10, NA, NA) 390 391 compare_expression(any(.input > 5), data) 392 compare_expression(any(.input > 5, na.rm = TRUE), data) 393 compare_expression(any(.input < 1), data) 394 compare_expression(any(.input < 1, na.rm = TRUE), data) 395 396 data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE) 397 398 compare_expression(any(.input), data_logical) 399 compare_expression(any(.input, na.rm = FALSE), data_logical) 400 compare_expression(any(.input, na.rm = TRUE), data_logical) 401}) 402 403test_that("all.Array and all.ChunkedArray", { 404 data <- c(1:10, NA, NA) 405 406 compare_expression(all(.input > 5), data) 407 compare_expression(all(.input > 5, na.rm = TRUE), data) 408 409 compare_expression(all(.input < 11), data) 410 compare_expression(all(.input < 11, na.rm = TRUE), data) 411 412 data_logical <- c(TRUE, TRUE, NA) 413 414 compare_expression(all(.input), data_logical) 415 compare_expression(all(.input, na.rm = TRUE), data_logical) 416}) 417 418test_that("variance", { 419 data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA) 420 arr <- Array$create(data) 421 chunked_arr <- ChunkedArray$create(data) 422 423 expect_equal(call_function("variance", arr, options = list(ddof = 5)), Scalar$create(34596)) 424 expect_equal(call_function("variance", chunked_arr, options = list(ddof = 5)), Scalar$create(34596)) 425}) 426 427test_that("stddev", { 428 data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA) 429 arr <- Array$create(data) 430 chunked_arr <- ChunkedArray$create(data) 431 432 expect_equal(call_function("stddev", arr, options = list(ddof = 5)), Scalar$create(186)) 433 expect_equal(call_function("stddev", chunked_arr, options = list(ddof = 5)), Scalar$create(186)) 434}) 435