1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18context("ChunkedArray") 19 20expect_chunked_roundtrip <- function(x, type) { 21 a <- ChunkedArray$create(!!!x) 22 flat_x <- unlist(x, recursive = FALSE) 23 attributes(flat_x) <- attributes(x[[1]]) 24 expect_type_equal(a$type, type) 25 expect_identical(a$num_chunks, length(x)) 26 expect_identical(length(a), length(flat_x)) 27 if (!inherits(type, "ListType")) { 28 # TODO: revisit how missingness works with ListArrays 29 # R list objects don't handle missingness the same way as other vectors. 30 # Is there some vctrs thing we should do on the roundtrip back to R? 31 expect_identical(is.na(a), is.na(flat_x)) 32 } 33 expect_equal(as.vector(a), flat_x) 34 expect_equal(as.vector(a$chunk(0)), x[[1]]) 35 36 if (length(flat_x)) { 37 a_sliced <- a$Slice(1) 38 x_sliced <- flat_x[-1] 39 expect_type_equal(a_sliced$type, type) 40 expect_identical(length(a_sliced), length(x_sliced)) 41 if (!inherits(type, "ListType")) { 42 expect_identical(is.na(a_sliced), is.na(x_sliced)) 43 } 44 expect_equal(as.vector(a_sliced), x_sliced) 45 } 46 invisible(a) 47} 48 49test_that("ChunkedArray", { 50 x <- expect_chunked_roundtrip(list(1:10, 1:10, 1:5), int32()) 51 52 y <- x$Slice(8) 53 expect_equal(y$type, int32()) 54 expect_equal(y$num_chunks, 3L) 55 expect_equal(length(y), 17L) 56 expect_equal(as.vector(y), c(9:10, 1:10, 1:5)) 57 58 z <- x$Slice(8, 5) 59 expect_equal(z$type, int32()) 60 expect_equal(z$num_chunks, 2L) 61 expect_equal(z$length(), 5L) 62 expect_equal(z$as_vector(), c(9:10, 1:3)) 63 64 expect_chunked_roundtrip(list(c(1,2,3), c(4,5,6)), float64()) 65 66 # input validation 67 expect_error(x$chunk(14), "subscript out of bounds") 68 expect_error(x$chunk("one"), class = "Rcpp::not_compatible") 69 expect_error(x$chunk(NA_integer_), "'i' cannot be NA") 70 expect_error(x$chunk(-1), "subscript out of bounds") 71 72 expect_error(x$Slice("ten"), class = "Rcpp::not_compatible") 73 expect_error(x$Slice(NA_integer_), "Slice 'offset' cannot be NA") 74 expect_error(x$Slice(NA), "Slice 'offset' cannot be NA") 75 expect_error(x$Slice(10, "ten"), class = "Rcpp::not_compatible") 76 expect_error(x$Slice(10, NA_integer_), "Slice 'length' cannot be NA") 77 expect_error(x$Slice(NA_integer_, NA_integer_), "Slice 'offset' cannot be NA") 78 expect_error(x$Slice(c(10, 10)), class = "Rcpp::not_compatible") 79 expect_error(x$Slice(10, c(10, 10)), class = "Rcpp::not_compatible") 80 expect_error(x$Slice(1000), "Slice 'offset' greater than array length") 81 expect_error(x$Slice(-1), "Slice 'offset' cannot be negative") 82 expect_error(z$Slice(10, 10), "Slice 'offset' greater than array length") 83 expect_error(x$Slice(10, -1), "Slice 'length' cannot be negative") 84 expect_error(x$Slice(-1, 10), "Slice 'offset' cannot be negative") 85 86 expect_warning(x$Slice(10, 15), NA) 87 expect_warning( 88 overslice <- x$Slice(10, 16), 89 "Slice 'length' greater than available length" 90 ) 91 expect_equal(length(overslice), 15) 92 expect_warning(z$Slice(2, 10), "Slice 'length' greater than available length") 93}) 94 95test_that("print ChunkedArray", { 96 x1 <- chunked_array(c(1,2,3), c(4,5,6)) 97 expect_output( 98 print(x1), 99 paste( 100 "ChunkedArray", 101 "<double>", 102 "[", 103 " 1,", 104 " 2,", 105 " 3,", 106 " ...", 107 "]", 108 sep = "\n" 109 ), 110 fixed = TRUE 111 ) 112 x2 <- chunked_array(1:30, c(4,5,6)) 113 expect_output( 114 print(x2), 115 paste( 116 "ChunkedArray", 117 "<int32>", 118 "[", 119 " 1,", 120 " 2,", 121 " 3,", 122 " 4,", 123 " 5,", 124 " 6,", 125 " 7,", 126 " 8,", 127 " 9,", 128 " 10,", 129 " ...", 130 "]", 131 sep = "\n" 132 ), 133 fixed = TRUE 134 ) 135 # If there's only one chunk, it should look like a regular Array 136 x3 <- chunked_array(1:30) 137 expect_output( 138 print(x3), 139 paste0("Chunked", paste(capture.output(print(Array$create(1:30))), collapse = "\n")), 140 fixed = TRUE 141 ) 142}) 143 144test_that("ChunkedArray handles !!! splicing", { 145 data <- list(1, 2, 3) 146 x <- chunked_array(!!!data) 147 expect_equal(x$type, float64()) 148 expect_equal(x$num_chunks, 3L) 149}) 150 151test_that("ChunkedArray handles NA", { 152 data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L)) 153 x <- chunked_array(!!!data) 154 expect_equal(x$type, int32()) 155 expect_equal(x$num_chunks, 3L) 156 expect_equal(length(x), 25L) 157 expect_equal(as.vector(x), c(1:10, c(NA, 2:10), c(1:3, NA, 5))) 158 159 chunks <- x$chunks 160 expect_equal(is.na(chunks[[1]]), is.na(data[[1]])) 161 expect_equal(is.na(chunks[[2]]), is.na(data[[2]])) 162 expect_equal(is.na(chunks[[3]]), is.na(data[[3]])) 163 expect_equal(is.na(x), c(is.na(data[[1]]), is.na(data[[2]]), is.na(data[[3]]))) 164}) 165 166test_that("ChunkedArray supports logical vectors (ARROW-3341)", { 167 # with NA 168 data <- purrr::rerun(3, sample(c(TRUE, FALSE, NA), 100, replace = TRUE)) 169 expect_chunked_roundtrip(data, bool()) 170 # without NA 171 data <- purrr::rerun(3, sample(c(TRUE, FALSE), 100, replace = TRUE)) 172 expect_chunked_roundtrip(data, bool()) 173}) 174 175test_that("ChunkedArray supports character vectors (ARROW-3339)", { 176 data <- list( 177 c("itsy", NA, "spider"), 178 c("Climbed", "up", "the", "water", "spout"), 179 c("Down", "came", "the", "rain"), 180 "And washed the spider out. " 181 ) 182 expect_chunked_roundtrip(data, utf8()) 183}) 184 185test_that("ChunkedArray supports factors (ARROW-3716)", { 186 f <- factor(c("itsy", "bitsy", "spider", "spider")) 187 expect_chunked_roundtrip(list(f, f, f), dictionary(int8())) 188}) 189 190test_that("ChunkedArray supports dates (ARROW-3716)", { 191 d <- Sys.Date() + 1:10 192 expect_chunked_roundtrip(list(d, d), date32()) 193}) 194 195test_that("ChunkedArray supports POSIXct (ARROW-3716)", { 196 times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 197 expect_chunked_roundtrip(list(times, times), timestamp("us", "UTC")) 198}) 199 200test_that("ChunkedArray supports integer64 (ARROW-3716)", { 201 x <- bit64::as.integer64(1:10) 202 expect_chunked_roundtrip(list(x, x), int64()) 203}) 204 205test_that("ChunkedArray supports difftime", { 206 time <- hms::hms(56, 34, 12) 207 expect_chunked_roundtrip(list(time, time), time32("s")) 208}) 209 210test_that("integer types casts for ChunkedArray (ARROW-3741)", { 211 int_types <- c(int8(), int16(), int32(), int64()) 212 uint_types <- c(uint8(), uint16(), uint32(), uint64()) 213 float_types <- c(float32(), float64()) # float16() not really supported in C++ yet 214 all_types <- c( 215 int_types, 216 uint_types, 217 float_types 218 ) 219 220 a <- chunked_array(1:10, 1:10) 221 for (type in c(int_types, uint_types)) { 222 casted <- a$cast(type) 223 expect_is(casted, "ChunkedArray") 224 expect_type_equal(casted$type, type) 225 } 226 # Also test casting to double(), not actually a type, a base R function but should be alias for float64 227 dbl <- a$cast(double()) 228 expect_is(dbl, "ChunkedArray") 229 expect_type_equal(dbl$type, float64()) 230}) 231 232test_that("chunked_array() supports the type= argument. conversion from INTSXP and int64 to all int types", { 233 num_int32 <- 12L 234 num_int64 <- bit64::as.integer64(10) 235 for (type in all_types) { 236 expect_type_equal(chunked_array(num_int32, type = type)$type, type) 237 expect_type_equal(chunked_array(num_int64, type = type)$type, type) 238 } 239 # also test creating with double() "type" 240 expect_type_equal(chunked_array(num_int32, type = double())$type, float64()) 241}) 242 243test_that("ChunkedArray$create() aborts on overflow", { 244 expect_error(chunked_array(128L, type = int8())$type, "Invalid.*Value is too large") 245 expect_error(chunked_array(-129L, type = int8())$type, "Invalid.*Value is too large") 246 247 expect_error(chunked_array(256L, type = uint8())$type, "Invalid.*Value is too large") 248 expect_error(chunked_array(-1L, type = uint8())$type, "Invalid.*Value is too large") 249 250 expect_error(chunked_array(32768L, type = int16())$type, "Invalid.*Value is too large") 251 expect_error(chunked_array(-32769L, type = int16())$type, "Invalid.*Value is too large") 252 253 expect_error(chunked_array(65536L, type = uint16())$type, "Invalid.*Value is too large") 254 expect_error(chunked_array(-1L, type = uint16())$type, "Invalid.*Value is too large") 255 256 expect_error(chunked_array(65536L, type = uint16())$type, "Invalid.*Value is too large") 257 expect_error(chunked_array(-1L, type = uint16())$type, "Invalid.*Value is too large") 258 259 expect_error(chunked_array(bit64::as.integer64(2^31), type = int32()), "Invalid.*Value is too large") 260 expect_error(chunked_array(bit64::as.integer64(2^32), type = uint32()), "Invalid.*Value is too large") 261}) 262 263test_that("chunked_array() convert doubles to integers", { 264 for (type in c(int_types, uint_types)) { 265 a <- chunked_array(10, type = type) 266 expect_type_equal(a$type, type) 267 if (type != uint64()) { 268 # exception for unsigned integer 64 that 269 # wa cannot handle yet 270 expect_true(as.vector(a) == 10) 271 } 272 } 273}) 274 275test_that("chunked_array() uses the first ... to infer type", { 276 a <- chunked_array(10, 10L) 277 expect_type_equal(a$type, float64()) 278}) 279 280test_that("chunked_array() handles downcasting", { 281 a <- chunked_array(10L, 10) 282 expect_type_equal(a$type, int32()) 283 expect_equal(as.vector(a), c(10L, 10L)) 284}) 285 286test_that("chunked_array() makes chunks of the same type", { 287 a <- chunked_array(10L, bit64::as.integer64(13), type = int64()) 288 for(chunk in a$chunks) { 289 expect_type_equal(chunk$type, int64()) 290 } 291}) 292 293test_that("chunked_array() handles 0 chunks if given a type", { 294 for (type in all_types) { 295 a <- chunked_array(type = type) 296 expect_type_equal(a$type, as_type(type)) 297 expect_equal(length(a), 0L) 298 } 299}) 300 301test_that("chunked_array() can ingest arrays (ARROW-3815)", { 302 expect_equal( 303 as.vector(chunked_array(1:5, Array$create(6:10))), 304 1:10 305 ) 306}) 307 308test_that("chunked_array() handles data frame -> struct arrays (ARROW-3811)", { 309 df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10]) 310 a <- chunked_array(df, df) 311 expect_type_equal(a$type, struct(x = int32(), y = float64(), z = utf8())) 312 expect_equivalent(a$as_vector(), rbind(df, df)) 313}) 314 315test_that("ChunkedArray$View() (ARROW-6542)", { 316 a <- ChunkedArray$create(1:3, 1:4) 317 b <- a$View(float32()) 318 expect_equal(b$type, float32()) 319 expect_equal(length(b), 7L) 320 expect_true(all( 321 sapply(b$chunks, function(.x) .x$type == float32()) 322 )) 323 # Input validation 324 expect_error(a$View("not a type"), "type must be a DataType, not character") 325}) 326 327test_that("ChunkedArray$Validate()", { 328 a <- ChunkedArray$create(1:10) 329 expect_error(a$Validate(), NA) 330}) 331 332test_that("[ ChunkedArray", { 333 one_chunk <- chunked_array(2:11) 334 x <- chunked_array(1:10, 31:40, 51:55) 335 # Slice 336 expect_vector(x[8:12], c(8:10, 31:32)) 337 # Take from same chunk 338 expect_vector(x[c(11, 15, 12)], c(31, 35, 32)) 339 # Take from multiple chunks (calls Concatenate) 340 expect_vector(x[c(2, 11, 15, 12, 3)], c(2, 31, 35, 32, 3)) 341 # Take with Array (note these are 0-based) 342 take1 <- Array$create(c(10L, 14L, 11L)) 343 expect_vector(x[take1], c(31, 35, 32)) 344 # Take with ChunkedArray 345 take2 <- ChunkedArray$create(c(10L, 14L), 11L) 346 expect_vector(x[take2], c(31, 35, 32)) 347 348 # Filter (with recycling) 349 expect_vector( 350 one_chunk[c(FALSE, TRUE, FALSE, FALSE, TRUE)], 351 c(3, 6, 8, 11) 352 ) 353 # Filter where both are 1-chunk 354 expect_vector( 355 one_chunk[ChunkedArray$create(rep(c(FALSE, TRUE, FALSE, FALSE, TRUE), 2))], 356 c(3, 6, 8, 11) 357 ) 358 # Filter multi-chunk with logical (-> Array) 359 expect_vector( 360 x[c(FALSE, TRUE, FALSE, FALSE, TRUE)], 361 c(2, 5, 7, 10, 32, 35, 37, 40, 52, 55) 362 ) 363 # Filter with a chunked array with different sized chunks 364 p1 <- c(FALSE, TRUE, FALSE, FALSE, TRUE) 365 p2 <- c(TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE) 366 filt <- ChunkedArray$create(p1, p2, p2) 367 expect_vector( 368 x[filt], 369 c(2, 5, 6, 8, 9, 35, 36, 38, 39, 55) 370 ) 371}) 372 373test_that("ChunkedArray head/tail", { 374 vec <- 11:20 375 a <- ChunkedArray$create(11:15, 16:20) 376 expect_vector(head(a), head(vec)) 377 expect_vector(head(a, 4), head(vec, 4)) 378 expect_vector(head(a, 40), head(vec, 40)) 379 expect_vector(head(a, -4), head(vec, -4)) 380 expect_vector(head(a, -40), head(vec, -40)) 381 expect_vector(tail(a), tail(vec)) 382 expect_vector(tail(a, 4), tail(vec, 4)) 383 expect_vector(tail(a, 40), tail(vec, 40)) 384 expect_vector(tail(a, -40), tail(vec, -40)) 385}) 386 387test_that("ChunkedArray$Equals", { 388 vec <- 11:20 389 a <- ChunkedArray$create(vec[1:5], vec[6:10]) 390 b <- ChunkedArray$create(vec[1:5], vec[6:10]) 391 expect_equal(a, b) 392 expect_true(a$Equals(b)) 393 expect_false(a$Equals(vec)) 394}) 395