1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18skip_if_not_available("dataset") 19 20library(dplyr, warn.conflicts = FALSE) 21library(stringr) 22 23tbl <- example_data 24# Add some better string data 25tbl$verses <- verses[[1]] 26# c(" a ", " b ", " c ", ...) increasing padding 27# nchar = 3 5 7 9 11 13 15 17 19 21 28tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both") 29tbl$another_chr <- tail(letters, 10) 30 31test_that("basic select/filter/collect", { 32 batch <- record_batch(tbl) 33 34 b2 <- batch %>% 35 select(int, chr) %>% 36 filter(int > 5) 37 38 expect_s3_class(b2, "arrow_dplyr_query") 39 t2 <- collect(b2) 40 expect_equal(t2, tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]) 41 # Test that the original object is not affected 42 expect_identical(collect(batch), tbl) 43}) 44 45test_that("dim() on query", { 46 compare_dplyr_binding( 47 .input %>% 48 filter(int > 5) %>% 49 select(int, chr) %>% 50 dim(), 51 tbl 52 ) 53}) 54 55test_that("Print method", { 56 expect_output( 57 record_batch(tbl) %>% 58 filter(dbl > 2, chr == "d" | chr == "f") %>% 59 select(chr, int, lgl) %>% 60 filter(int < 5) %>% 61 select(int, chr) %>% 62 print(), 63 'InMemoryDataset (query) 64int: int32 65chr: string 66 67* Filter: (((dbl > 2) and ((chr == "d") or (chr == "f"))) and (int < 5)) 68See $.data for the source Arrow object', 69 fixed = TRUE 70 ) 71}) 72 73test_that("pull", { 74 compare_dplyr_binding( 75 .input %>% pull(), 76 tbl 77 ) 78 compare_dplyr_binding( 79 .input %>% pull(1), 80 tbl 81 ) 82 compare_dplyr_binding( 83 .input %>% pull(chr), 84 tbl 85 ) 86 compare_dplyr_binding( 87 .input %>% 88 filter(int > 4) %>% 89 rename(strng = chr) %>% 90 pull(strng), 91 tbl 92 ) 93}) 94 95test_that("collect(as_data_frame=FALSE)", { 96 batch <- record_batch(tbl) 97 98 b1 <- batch %>% collect(as_data_frame = FALSE) 99 100 expect_r6_class(b1, "RecordBatch") 101 102 b2 <- batch %>% 103 select(int, chr) %>% 104 filter(int > 5) %>% 105 collect(as_data_frame = FALSE) 106 107 # collect(as_data_frame = FALSE) always returns Table now 108 expect_r6_class(b2, "Table") 109 expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")] 110 expect_equal(as.data.frame(b2), expected) 111 112 b3 <- batch %>% 113 select(int, strng = chr) %>% 114 filter(int > 5) %>% 115 collect(as_data_frame = FALSE) 116 expect_r6_class(b3, "Table") 117 expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) 118 119 b4 <- batch %>% 120 select(int, strng = chr) %>% 121 filter(int > 5) %>% 122 group_by(int) %>% 123 collect(as_data_frame = FALSE) 124 expect_s3_class(b4, "arrow_dplyr_query") 125 expect_equal( 126 as.data.frame(b4), 127 expected %>% 128 rename(strng = chr) %>% 129 group_by(int) 130 ) 131}) 132 133test_that("compute()", { 134 batch <- record_batch(tbl) 135 136 b1 <- batch %>% compute() 137 138 expect_r6_class(b1, "RecordBatch") 139 140 b2 <- batch %>% 141 select(int, chr) %>% 142 filter(int > 5) %>% 143 compute() 144 145 expect_r6_class(b2, "Table") 146 expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")] 147 expect_equal(as.data.frame(b2), expected) 148 149 b3 <- batch %>% 150 select(int, strng = chr) %>% 151 filter(int > 5) %>% 152 compute() 153 expect_r6_class(b3, "Table") 154 expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) 155 156 b4 <- batch %>% 157 select(int, strng = chr) %>% 158 filter(int > 5) %>% 159 group_by(int) %>% 160 compute() 161 expect_s3_class(b4, "arrow_dplyr_query") 162 expect_equal( 163 as.data.frame(b4), 164 expected %>% 165 rename(strng = chr) %>% 166 group_by(int) 167 ) 168}) 169 170test_that("head", { 171 batch <- record_batch(tbl) 172 173 b2 <- batch %>% 174 select(int, chr) %>% 175 filter(int > 5) %>% 176 head(2) 177 expect_s3_class(b2, "arrow_dplyr_query") 178 expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")][1:2, ] 179 expect_equal(collect(b2), expected) 180 181 b3 <- batch %>% 182 select(int, strng = chr) %>% 183 filter(int > 5) %>% 184 head(2) 185 expect_s3_class(b3, "arrow_dplyr_query") 186 expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) 187 188 b4 <- batch %>% 189 select(int, strng = chr) %>% 190 filter(int > 5) %>% 191 group_by(int) %>% 192 head(2) 193 expect_s3_class(b4, "arrow_dplyr_query") 194 expect_equal( 195 as.data.frame(b4), 196 expected %>% 197 rename(strng = chr) %>% 198 group_by(int) 199 ) 200 201 expect_equal( 202 batch %>% 203 select(int, strng = chr) %>% 204 filter(int > 5) %>% 205 head(2) %>% 206 mutate(twice = int * 2) %>% 207 collect(), 208 expected %>% 209 rename(strng = chr) %>% 210 mutate(twice = int * 2) 211 ) 212 213 # This would fail if we evaluated head() after filter() 214 expect_equal( 215 batch %>% 216 select(int, strng = chr) %>% 217 head(2) %>% 218 filter(int > 5) %>% 219 collect(), 220 expected %>% 221 rename(strng = chr) %>% 222 filter(FALSE) 223 ) 224}) 225 226test_that("arrange then head returns the right data (ARROW-14162)", { 227 228 compare_dplyr_binding( 229 .input %>% 230 # mpg has ties so we need to sort by two things to get deterministic order 231 arrange(mpg, disp) %>% 232 head(4) %>% 233 collect(), 234 mtcars, 235 ignore_attr = "row.names" 236 ) 237}) 238 239test_that("arrange then tail returns the right data", { 240 compare_dplyr_binding( 241 .input %>% 242 # mpg has ties so we need to sort by two things to get deterministic order 243 arrange(mpg, disp) %>% 244 tail(4) %>% 245 collect(), 246 mtcars, 247 ignore_attr = "row.names" 248 ) 249}) 250 251test_that("tail", { 252 batch <- record_batch(tbl) 253 254 b2 <- batch %>% 255 select(int, chr) %>% 256 filter(int > 5) %>% 257 arrange(int) %>% 258 tail(2) 259 260 expect_s3_class(b2, "arrow_dplyr_query") 261 expected <- tail(tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")], 2) 262 expect_equal(as.data.frame(b2), expected) 263 264 b3 <- batch %>% 265 select(int, strng = chr) %>% 266 filter(int > 5) %>% 267 arrange(int) %>% 268 tail(2) 269 expect_s3_class(b3, "arrow_dplyr_query") 270 expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) 271 272 b4 <- batch %>% 273 select(int, strng = chr) %>% 274 filter(int > 5) %>% 275 group_by(int) %>% 276 arrange(int) %>% 277 tail(2) 278 expect_s3_class(b4, "arrow_dplyr_query") 279 expect_equal( 280 as.data.frame(b4), 281 expected %>% 282 rename(strng = chr) %>% 283 group_by(int) 284 ) 285}) 286 287test_that("No duplicate field names are allowed in an arrow_dplyr_query", { 288 expect_error( 289 Table$create(tbl, tbl) %>% 290 filter(int > 0), 291 regexp = paste0( 292 'The following field names were found more than once in the data: "int", "dbl", ', 293 '"dbl2", "lgl", "false", "chr", "fct", "verses", "padded_strings"' 294 ) 295 ) 296}) 297