1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
19#' @importFrom R6 R6Class
20#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap imap_chr
21#' @importFrom assertthat assert_that is.string
22#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos
23#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind set_names exec
24#' @importFrom rlang is_bare_character quo_get_expr quo_get_env quo_set_expr .data seq2 is_interactive
25#' @importFrom rlang expr caller_env is_character quo_name is_quosure enexpr enexprs as_quosure
26#' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
27#' @useDynLib arrow, .registration = TRUE
28#' @keywords internal
29"_PACKAGE"
30
31#' @importFrom vctrs s3_register vec_size vec_cast vec_unique
32.onLoad <- function(...) {
33  dplyr_methods <- paste0(
34    "dplyr::",
35    c(
36      "select", "filter", "collect", "summarise", "group_by", "groups",
37      "group_vars", "group_by_drop_default", "ungroup", "mutate", "transmute",
38      "arrange", "rename", "pull", "relocate", "compute", "collapse",
39      "distinct", "left_join", "right_join", "inner_join", "full_join",
40      "semi_join", "anti_join", "count", "tally"
41    )
42  )
43  for (cl in c("Dataset", "ArrowTabular", "arrow_dplyr_query")) {
44    for (m in dplyr_methods) {
45      s3_register(m, cl)
46    }
47  }
48  s3_register("dplyr::tbl_vars", "arrow_dplyr_query")
49
50  for (cl in c(
51    "Array", "RecordBatch", "ChunkedArray", "Table", "Schema",
52    "Field", "DataType", "RecordBatchReader"
53  )) {
54    s3_register("reticulate::py_to_r", paste0("pyarrow.lib.", cl))
55    s3_register("reticulate::r_to_py", cl)
56  }
57
58  # Create these once, at package build time
59  if (arrow_available()) {
60    # Also include all available Arrow Compute functions,
61    # namespaced as arrow_fun.
62    # We can't do this at install time because list_compute_functions() may error
63    all_arrow_funs <- list_compute_functions()
64    arrow_funcs <- set_names(
65      lapply(all_arrow_funs, function(fun) {
66        force(fun)
67        function(...) build_expr(fun, ...)
68      }),
69      paste0("arrow_", all_arrow_funs)
70    )
71    .cache$functions <- c(nse_funcs, arrow_funcs)
72  }
73
74  if (tolower(Sys.info()[["sysname"]]) == "windows") {
75    # Disable multithreading on Windows
76    # See https://issues.apache.org/jira/browse/ARROW-8379
77    options(arrow.use_threads = FALSE)
78  }
79
80  invisible()
81}
82
83.onAttach <- function(libname, pkgname) {
84  if (!arrow_available()) {
85    msg <- paste(
86      "The Arrow C++ library is not available. To retry installation with debug output, run:",
87      "    install_arrow(verbose = TRUE)",
88      "See https://arrow.apache.org/docs/r/articles/install.html for more guidance and troubleshooting.",
89      sep = "\n"
90    )
91    packageStartupMessage(msg)
92  } else {
93    # Just to be extra safe, let's wrap this in a try();
94    # we don't a failed startup message to prevent the package from loading
95    try({
96      features <- arrow_info()$capabilities
97      # That has all of the #ifdef features, plus the compression libs and the
98      # string libraries (but not the memory allocators, they're added elsewhere)
99      #
100      # Let's print a message if some are off
101      if (some_features_are_off(features)) {
102        packageStartupMessage("See arrow_info() for available features")
103      }
104    })
105  }
106}
107
108#' Is the C++ Arrow library available?
109#'
110#' You won't generally need to call these function, but they're made available
111#' for diagnostic purposes.
112#' @return `TRUE` or `FALSE` depending on whether the package was installed
113#' with:
114#' * The Arrow C++ library (check with `arrow_available()`)
115#' * Arrow Dataset support enabled (check with `arrow_with_dataset()`)
116#' * Parquet support enabled (check with `arrow_with_parquet()`)
117#' * JSON support enabled (check with `arrow_with_json()`)
118#' * Amazon S3 support enabled (check with `arrow_with_s3()`)
119#' @export
120#' @examples
121#' arrow_available()
122#' arrow_with_dataset()
123#' arrow_with_parquet()
124#' arrow_with_json()
125#' arrow_with_s3()
126#' @seealso If any of these are `FALSE`, see
127#' `vignette("install", package = "arrow")` for guidance on reinstalling the
128#' package.
129arrow_available <- function() {
130  tryCatch(.Call(`_arrow_available`), error = function(e) {
131    return(FALSE)
132  })
133}
134
135#' @rdname arrow_available
136#' @export
137arrow_with_dataset <- function() {
138  is_32bit <- .Machine$sizeof.pointer < 8
139  is_old_r <- getRversion() < "4.0.0"
140  is_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
141  if (is_32bit && is_old_r && is_windows) {
142    # 32-bit rtools 3.5 does not properly implement the std::thread expectations
143    # but we can't just disable ARROW_DATASET in that build,
144    # so report it as "off" here.
145    return(FALSE)
146  }
147  tryCatch(.Call(`_dataset_available`), error = function(e) {
148    return(FALSE)
149  })
150}
151
152#' @rdname arrow_available
153#' @export
154arrow_with_parquet <- function() {
155  tryCatch(.Call(`_parquet_available`), error = function(e) {
156    return(FALSE)
157  })
158}
159
160#' @rdname arrow_available
161#' @export
162arrow_with_s3 <- function() {
163  tryCatch(.Call(`_s3_available`), error = function(e) {
164    return(FALSE)
165  })
166}
167
168#' @rdname arrow_available
169#' @export
170arrow_with_json <- function() {
171  tryCatch(.Call(`_json_available`), error = function(e) {
172    return(FALSE)
173  })
174}
175
176option_use_threads <- function() {
177  !is_false(getOption("arrow.use_threads"))
178}
179
180#' Report information on the package's capabilities
181#'
182#' This function summarizes a number of build-time configurations and run-time
183#' settings for the Arrow package. It may be useful for diagnostics.
184#' @return A list including version information, boolean "capabilities", and
185#' statistics from Arrow's memory allocator, and also Arrow's run-time
186#' information.
187#' @export
188#' @importFrom utils packageVersion
189arrow_info <- function() {
190  opts <- options()
191  out <- list(
192    version = packageVersion("arrow"),
193    libarrow = arrow_available(),
194    options = opts[grep("^arrow\\.", names(opts))]
195  )
196  if (out$libarrow) {
197    pool <- default_memory_pool()
198    runtimeinfo <- runtime_info()
199    buildinfo <- build_info()
200    compute_funcs <- list_compute_functions()
201    out <- c(out, list(
202      capabilities = c(
203        dataset = arrow_with_dataset(),
204        parquet = arrow_with_parquet(),
205        json = arrow_with_json(),
206        s3 = arrow_with_s3(),
207        utf8proc = "utf8_upper" %in% compute_funcs,
208        re2 = "replace_substring_regex" %in% compute_funcs,
209        vapply(tolower(names(CompressionType)[-1]), codec_is_available, logical(1))
210      ),
211      memory_pool = list(
212        backend_name = pool$backend_name,
213        bytes_allocated = pool$bytes_allocated,
214        max_memory = pool$max_memory,
215        available_backends = supported_memory_backends()
216      ),
217      runtime_info = list(
218        simd_level = runtimeinfo[1],
219        detected_simd_level = runtimeinfo[2]
220      ),
221      build_info = list(
222        cpp_version = buildinfo[1],
223        cpp_compiler = buildinfo[2],
224        cpp_compiler_version = buildinfo[3],
225        cpp_compiler_flags = buildinfo[4],
226        # git_id is "" if not built from a git checkout
227        # convert that to NULL
228        git_id = if (nzchar(buildinfo[5])) buildinfo[5]
229      )
230    ))
231  }
232  structure(out, class = "arrow_info")
233}
234
235some_features_are_off <- function(features) {
236  # `features` is a named logical vector (as in arrow_info()$capabilities)
237  # Let's exclude some less relevant ones
238  blocklist <- c("lzo", "bz2", "brotli")
239  # Return TRUE if any of the other features are FALSE
240  !all(features[setdiff(names(features), blocklist)])
241}
242
243#' @export
244print.arrow_info <- function(x, ...) {
245  print_key_values <- function(title, vals, ...) {
246    # Make a key-value table for printing, no column names
247    df <- data.frame(vals, stringsAsFactors = FALSE, ...)
248    names(df) <- ""
249
250    cat(title, ":\n", sep = "")
251    print(df)
252    cat("\n")
253  }
254  cat("Arrow package version: ", format(x$version), "\n\n", sep = "")
255  if (x$libarrow) {
256    print_key_values("Capabilities", c(
257      x$capabilities,
258      jemalloc = "jemalloc" %in% x$memory_pool$available_backends,
259      mimalloc = "mimalloc" %in% x$memory_pool$available_backends
260    ))
261    if (some_features_are_off(x$capabilities) && identical(tolower(Sys.info()[["sysname"]]), "linux")) {
262      # Only on linux because (e.g.) we disable certain features on purpose on rtools35 and solaris
263      cat(
264        "To reinstall with more optional capabilities enabled, see\n",
265        "  https://arrow.apache.org/docs/r/articles/install.html\n\n"
266      )
267    }
268
269    if (length(x$options)) {
270      print_key_values("Arrow options()", map_chr(x$options, format))
271    }
272
273    format_bytes <- function(b, units = "auto", digits = 2L, ...) {
274      format(structure(b, class = "object_size"), units = units, digits = digits, ...)
275    }
276    print_key_values("Memory", c(
277      Allocator = x$memory_pool$backend_name,
278      # utils:::format.object_size is not properly vectorized
279      Current = format_bytes(x$memory_pool$bytes_allocated, ...),
280      Max = format_bytes(x$memory_pool$max_memory, ...)
281    ))
282    print_key_values("Runtime", c(
283      `SIMD Level` = x$runtime_info$simd_level,
284      `Detected SIMD Level` = x$runtime_info$detected_simd_level
285    ))
286    print_key_values("Build", c(
287      `C++ Library Version` = x$build_info$cpp_version,
288      `C++ Compiler` = x$build_info$cpp_compiler,
289      `C++ Compiler Version` = x$build_info$cpp_compiler_version,
290      `Git ID` = x$build_info$git_id
291    ))
292  } else {
293    cat(
294      "Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html ",
295      "for troubleshooting.\n"
296    )
297  }
298  invisible(x)
299}
300
301option_compress_metadata <- function() {
302  !is_false(getOption("arrow.compress_metadata"))
303}
304
305#' @include enums.R
306ArrowObject <- R6Class("ArrowObject",
307  public = list(
308    initialize = function(xp) self$set_pointer(xp),
309    pointer = function() get(".:xp:.", envir = self),
310    `.:xp:.` = NULL,
311    set_pointer = function(xp) {
312      if (!inherits(xp, "externalptr")) {
313        stop(
314          class(self)[1], "$new() requires a pointer as input: ",
315          "did you mean $create() instead?",
316          call. = FALSE
317        )
318      }
319      assign(".:xp:.", xp, envir = self)
320    },
321    print = function(...) {
322      if (!is.null(self$.class_title)) {
323        # Allow subclasses to override just printing the class name first
324        class_title <- self$.class_title()
325      } else {
326        class_title <- class(self)[[1]]
327      }
328      cat(class_title, "\n", sep = "")
329      if (!is.null(self$ToString)) {
330        cat(self$ToString(), "\n", sep = "")
331      }
332      invisible(self)
333    },
334    invalidate = function() {
335      assign(".:xp:.", NULL, envir = self)
336    }
337  )
338)
339
340#' @export
341`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs) # nolint
342
343#' @export
344`==.ArrowObject` <- function(x, y) { # nolint
345  x$Equals(y)
346}
347
348#' @export
349all.equal.ArrowObject <- function(target, current, ..., check.attributes = TRUE) {
350  target$Equals(current, check_metadata = check.attributes)
351}
352