1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail 19#' @importFrom R6 R6Class 20#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap imap_chr 21#' @importFrom assertthat assert_that is.string 22#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos 23#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind set_names exec 24#' @importFrom rlang is_bare_character quo_get_expr quo_get_env quo_set_expr .data seq2 is_interactive 25#' @importFrom rlang expr caller_env is_character quo_name is_quosure enexpr enexprs as_quosure 26#' @importFrom tidyselect vars_pull vars_rename vars_select eval_select 27#' @useDynLib arrow, .registration = TRUE 28#' @keywords internal 29"_PACKAGE" 30 31#' @importFrom vctrs s3_register vec_size vec_cast vec_unique 32.onLoad <- function(...) { 33 dplyr_methods <- paste0( 34 "dplyr::", 35 c( 36 "select", "filter", "collect", "summarise", "group_by", "groups", 37 "group_vars", "group_by_drop_default", "ungroup", "mutate", "transmute", 38 "arrange", "rename", "pull", "relocate", "compute", "collapse", 39 "distinct", "left_join", "right_join", "inner_join", "full_join", 40 "semi_join", "anti_join", "count", "tally" 41 ) 42 ) 43 for (cl in c("Dataset", "ArrowTabular", "arrow_dplyr_query")) { 44 for (m in dplyr_methods) { 45 s3_register(m, cl) 46 } 47 } 48 s3_register("dplyr::tbl_vars", "arrow_dplyr_query") 49 50 for (cl in c( 51 "Array", "RecordBatch", "ChunkedArray", "Table", "Schema", 52 "Field", "DataType", "RecordBatchReader" 53 )) { 54 s3_register("reticulate::py_to_r", paste0("pyarrow.lib.", cl)) 55 s3_register("reticulate::r_to_py", cl) 56 } 57 58 # Create these once, at package build time 59 if (arrow_available()) { 60 # Also include all available Arrow Compute functions, 61 # namespaced as arrow_fun. 62 # We can't do this at install time because list_compute_functions() may error 63 all_arrow_funs <- list_compute_functions() 64 arrow_funcs <- set_names( 65 lapply(all_arrow_funs, function(fun) { 66 force(fun) 67 function(...) build_expr(fun, ...) 68 }), 69 paste0("arrow_", all_arrow_funs) 70 ) 71 .cache$functions <- c(nse_funcs, arrow_funcs) 72 } 73 74 if (tolower(Sys.info()[["sysname"]]) == "windows") { 75 # Disable multithreading on Windows 76 # See https://issues.apache.org/jira/browse/ARROW-8379 77 options(arrow.use_threads = FALSE) 78 } 79 80 invisible() 81} 82 83.onAttach <- function(libname, pkgname) { 84 if (!arrow_available()) { 85 msg <- paste( 86 "The Arrow C++ library is not available. To retry installation with debug output, run:", 87 " install_arrow(verbose = TRUE)", 88 "See https://arrow.apache.org/docs/r/articles/install.html for more guidance and troubleshooting.", 89 sep = "\n" 90 ) 91 packageStartupMessage(msg) 92 } else { 93 # Just to be extra safe, let's wrap this in a try(); 94 # we don't a failed startup message to prevent the package from loading 95 try({ 96 features <- arrow_info()$capabilities 97 # That has all of the #ifdef features, plus the compression libs and the 98 # string libraries (but not the memory allocators, they're added elsewhere) 99 # 100 # Let's print a message if some are off 101 if (some_features_are_off(features)) { 102 packageStartupMessage("See arrow_info() for available features") 103 } 104 }) 105 } 106} 107 108#' Is the C++ Arrow library available? 109#' 110#' You won't generally need to call these function, but they're made available 111#' for diagnostic purposes. 112#' @return `TRUE` or `FALSE` depending on whether the package was installed 113#' with: 114#' * The Arrow C++ library (check with `arrow_available()`) 115#' * Arrow Dataset support enabled (check with `arrow_with_dataset()`) 116#' * Parquet support enabled (check with `arrow_with_parquet()`) 117#' * JSON support enabled (check with `arrow_with_json()`) 118#' * Amazon S3 support enabled (check with `arrow_with_s3()`) 119#' @export 120#' @examples 121#' arrow_available() 122#' arrow_with_dataset() 123#' arrow_with_parquet() 124#' arrow_with_json() 125#' arrow_with_s3() 126#' @seealso If any of these are `FALSE`, see 127#' `vignette("install", package = "arrow")` for guidance on reinstalling the 128#' package. 129arrow_available <- function() { 130 tryCatch(.Call(`_arrow_available`), error = function(e) { 131 return(FALSE) 132 }) 133} 134 135#' @rdname arrow_available 136#' @export 137arrow_with_dataset <- function() { 138 is_32bit <- .Machine$sizeof.pointer < 8 139 is_old_r <- getRversion() < "4.0.0" 140 is_windows <- tolower(Sys.info()[["sysname"]]) == "windows" 141 if (is_32bit && is_old_r && is_windows) { 142 # 32-bit rtools 3.5 does not properly implement the std::thread expectations 143 # but we can't just disable ARROW_DATASET in that build, 144 # so report it as "off" here. 145 return(FALSE) 146 } 147 tryCatch(.Call(`_dataset_available`), error = function(e) { 148 return(FALSE) 149 }) 150} 151 152#' @rdname arrow_available 153#' @export 154arrow_with_parquet <- function() { 155 tryCatch(.Call(`_parquet_available`), error = function(e) { 156 return(FALSE) 157 }) 158} 159 160#' @rdname arrow_available 161#' @export 162arrow_with_s3 <- function() { 163 tryCatch(.Call(`_s3_available`), error = function(e) { 164 return(FALSE) 165 }) 166} 167 168#' @rdname arrow_available 169#' @export 170arrow_with_json <- function() { 171 tryCatch(.Call(`_json_available`), error = function(e) { 172 return(FALSE) 173 }) 174} 175 176option_use_threads <- function() { 177 !is_false(getOption("arrow.use_threads")) 178} 179 180#' Report information on the package's capabilities 181#' 182#' This function summarizes a number of build-time configurations and run-time 183#' settings for the Arrow package. It may be useful for diagnostics. 184#' @return A list including version information, boolean "capabilities", and 185#' statistics from Arrow's memory allocator, and also Arrow's run-time 186#' information. 187#' @export 188#' @importFrom utils packageVersion 189arrow_info <- function() { 190 opts <- options() 191 out <- list( 192 version = packageVersion("arrow"), 193 libarrow = arrow_available(), 194 options = opts[grep("^arrow\\.", names(opts))] 195 ) 196 if (out$libarrow) { 197 pool <- default_memory_pool() 198 runtimeinfo <- runtime_info() 199 buildinfo <- build_info() 200 compute_funcs <- list_compute_functions() 201 out <- c(out, list( 202 capabilities = c( 203 dataset = arrow_with_dataset(), 204 parquet = arrow_with_parquet(), 205 json = arrow_with_json(), 206 s3 = arrow_with_s3(), 207 utf8proc = "utf8_upper" %in% compute_funcs, 208 re2 = "replace_substring_regex" %in% compute_funcs, 209 vapply(tolower(names(CompressionType)[-1]), codec_is_available, logical(1)) 210 ), 211 memory_pool = list( 212 backend_name = pool$backend_name, 213 bytes_allocated = pool$bytes_allocated, 214 max_memory = pool$max_memory, 215 available_backends = supported_memory_backends() 216 ), 217 runtime_info = list( 218 simd_level = runtimeinfo[1], 219 detected_simd_level = runtimeinfo[2] 220 ), 221 build_info = list( 222 cpp_version = buildinfo[1], 223 cpp_compiler = buildinfo[2], 224 cpp_compiler_version = buildinfo[3], 225 cpp_compiler_flags = buildinfo[4], 226 # git_id is "" if not built from a git checkout 227 # convert that to NULL 228 git_id = if (nzchar(buildinfo[5])) buildinfo[5] 229 ) 230 )) 231 } 232 structure(out, class = "arrow_info") 233} 234 235some_features_are_off <- function(features) { 236 # `features` is a named logical vector (as in arrow_info()$capabilities) 237 # Let's exclude some less relevant ones 238 blocklist <- c("lzo", "bz2", "brotli") 239 # Return TRUE if any of the other features are FALSE 240 !all(features[setdiff(names(features), blocklist)]) 241} 242 243#' @export 244print.arrow_info <- function(x, ...) { 245 print_key_values <- function(title, vals, ...) { 246 # Make a key-value table for printing, no column names 247 df <- data.frame(vals, stringsAsFactors = FALSE, ...) 248 names(df) <- "" 249 250 cat(title, ":\n", sep = "") 251 print(df) 252 cat("\n") 253 } 254 cat("Arrow package version: ", format(x$version), "\n\n", sep = "") 255 if (x$libarrow) { 256 print_key_values("Capabilities", c( 257 x$capabilities, 258 jemalloc = "jemalloc" %in% x$memory_pool$available_backends, 259 mimalloc = "mimalloc" %in% x$memory_pool$available_backends 260 )) 261 if (some_features_are_off(x$capabilities) && identical(tolower(Sys.info()[["sysname"]]), "linux")) { 262 # Only on linux because (e.g.) we disable certain features on purpose on rtools35 and solaris 263 cat( 264 "To reinstall with more optional capabilities enabled, see\n", 265 " https://arrow.apache.org/docs/r/articles/install.html\n\n" 266 ) 267 } 268 269 if (length(x$options)) { 270 print_key_values("Arrow options()", map_chr(x$options, format)) 271 } 272 273 format_bytes <- function(b, units = "auto", digits = 2L, ...) { 274 format(structure(b, class = "object_size"), units = units, digits = digits, ...) 275 } 276 print_key_values("Memory", c( 277 Allocator = x$memory_pool$backend_name, 278 # utils:::format.object_size is not properly vectorized 279 Current = format_bytes(x$memory_pool$bytes_allocated, ...), 280 Max = format_bytes(x$memory_pool$max_memory, ...) 281 )) 282 print_key_values("Runtime", c( 283 `SIMD Level` = x$runtime_info$simd_level, 284 `Detected SIMD Level` = x$runtime_info$detected_simd_level 285 )) 286 print_key_values("Build", c( 287 `C++ Library Version` = x$build_info$cpp_version, 288 `C++ Compiler` = x$build_info$cpp_compiler, 289 `C++ Compiler Version` = x$build_info$cpp_compiler_version, 290 `Git ID` = x$build_info$git_id 291 )) 292 } else { 293 cat( 294 "Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html ", 295 "for troubleshooting.\n" 296 ) 297 } 298 invisible(x) 299} 300 301option_compress_metadata <- function() { 302 !is_false(getOption("arrow.compress_metadata")) 303} 304 305#' @include enums.R 306ArrowObject <- R6Class("ArrowObject", 307 public = list( 308 initialize = function(xp) self$set_pointer(xp), 309 pointer = function() get(".:xp:.", envir = self), 310 `.:xp:.` = NULL, 311 set_pointer = function(xp) { 312 if (!inherits(xp, "externalptr")) { 313 stop( 314 class(self)[1], "$new() requires a pointer as input: ", 315 "did you mean $create() instead?", 316 call. = FALSE 317 ) 318 } 319 assign(".:xp:.", xp, envir = self) 320 }, 321 print = function(...) { 322 if (!is.null(self$.class_title)) { 323 # Allow subclasses to override just printing the class name first 324 class_title <- self$.class_title() 325 } else { 326 class_title <- class(self)[[1]] 327 } 328 cat(class_title, "\n", sep = "") 329 if (!is.null(self$ToString)) { 330 cat(self$ToString(), "\n", sep = "") 331 } 332 invisible(self) 333 }, 334 invalidate = function() { 335 assign(".:xp:.", NULL, envir = self) 336 } 337 ) 338) 339 340#' @export 341`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs) # nolint 342 343#' @export 344`==.ArrowObject` <- function(x, y) { # nolint 345 x$Equals(y) 346} 347 348#' @export 349all.equal.ArrowObject <- function(target, current, ..., check.attributes = TRUE) { 350 target$Equals(current, check_metadata = check.attributes) 351} 352