1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18#' Install or upgrade the Arrow library 19#' 20#' Use this function to install the latest release of `arrow`, to switch to or 21#' from a nightly development version, or on Linux to try reinstalling with 22#' all necessary C++ dependencies. 23#' 24#' Note that, unlike packages like `tensorflow`, `blogdown`, and others that 25#' require external dependencies, you do not need to run `install_arrow()` 26#' after a successful `arrow` installation. 27#' 28#' @param nightly logical: Should we install a development version of the 29#' package, or should we install from CRAN (the default). 30#' @param binary On Linux, value to set for the environment variable 31#' `LIBARROW_BINARY`, which governs how C++ binaries are used, if at all. 32#' The default value, `TRUE`, tells the installation script to detect the 33#' Linux distribution and version and find an appropriate C++ library. `FALSE` 34#' would tell the script not to retrieve a binary and instead build Arrow C++ 35#' from source. Other valid values are strings corresponding to a Linux 36#' distribution-version, to override the value that would be detected. 37#' See `vignette("install", package = "arrow")` for further details. 38#' @param use_system logical: Should we use `pkg-config` to look for Arrow 39#' system packages? Default is `FALSE`. If `TRUE`, source installation may be 40#' faster, but there is a risk of version mismatch. This sets the 41#' `ARROW_USE_PKG_CONFIG` environment variable. 42#' @param minimal logical: If building from source, should we build without 43#' optional dependencies (compression libraries, for example)? Default is 44#' `FALSE`. This sets the `LIBARROW_MINIMAL` environment variable. 45#' @param verbose logical: Print more debugging output when installing? Default 46#' is `FALSE`. This sets the `ARROW_R_DEV` environment variable. 47#' @param repos character vector of base URLs of the repositories to install 48#' from (passed to `install.packages()`) 49#' @param ... Additional arguments passed to `install.packages()` 50#' @export 51#' @importFrom utils install.packages 52#' @seealso [arrow_available()] to see if the package was configured with 53#' necessary C++ dependencies. `vignette("install", package = "arrow")` for 54#' more ways to tune installation on Linux. 55install_arrow <- function(nightly = FALSE, 56 binary = Sys.getenv("LIBARROW_BINARY", TRUE), 57 use_system = Sys.getenv("ARROW_USE_PKG_CONFIG", FALSE), 58 minimal = Sys.getenv("LIBARROW_MINIMAL", FALSE), 59 verbose = Sys.getenv("ARROW_R_DEV", FALSE), 60 repos = getOption("repos"), 61 ...) { 62 sysname <- tolower(Sys.info()[["sysname"]]) 63 conda <- isTRUE(grepl("conda", R.Version()$platform)) 64 65 if (conda) { 66 if (nightly) { 67 system("conda install -y -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow") 68 } else { 69 system("conda install -y -c conda-forge --strict-channel-priority r-arrow") 70 } 71 } else { 72 Sys.setenv( 73 LIBARROW_BINARY = binary, 74 LIBARROW_MINIMAL = minimal, 75 ARROW_R_DEV = verbose, 76 ARROW_USE_PKG_CONFIG = use_system 77 ) 78 # On the M1, we can't use the usual autobrew, which pulls Intel dependencies 79 apple_m1 <- grepl("arm-apple|aarch64.*darwin", R.Version()$platform) 80 # On Rosetta, we have to build without JEMALLOC, so we also can't autobrew 81 rosetta <- identical(sysname, "darwin") && identical(system("sysctl -n sysctl.proc_translated", intern = TRUE), "1") 82 if (rosetta) { 83 Sys.setenv(ARROW_JEMALLOC = "OFF") 84 } 85 if (apple_m1 || rosetta) { 86 Sys.setenv(FORCE_BUNDLED_BUILD = "true") 87 } 88 89 opts <- list() 90 if (apple_m1 || rosetta) { 91 # Skip binaries (esp. for rosetta) 92 opts$pkgType <- "source" 93 } else if (isTRUE(binary)) { 94 # Unless otherwise directed, don't consider newer source packages when 95 # options(pkgType) == "both" (default on win/mac) 96 opts$install.packages.check.source <- "no" 97 opts$install.packages.compile.from.source <- "never" 98 } 99 if (length(opts)) { 100 old <- options(opts) 101 on.exit(options(old)) 102 } 103 install.packages("arrow", repos = arrow_repos(repos, nightly), ...) 104 } 105 if ("arrow" %in% loadedNamespaces()) { 106 # If you've just sourced this file, "arrow" won't be (re)loaded 107 reload_arrow() 108 } 109} 110 111arrow_repos <- function(repos = getOption("repos"), nightly = FALSE) { 112 if (length(repos) == 0 || identical(repos, c(CRAN = "@CRAN@"))) { 113 # Set the default/CDN 114 repos <- "https://cloud.r-project.org/" 115 } 116 dev_repo <- getOption("arrow.dev_repo", "https://arrow-r-nightly.s3.amazonaws.com") 117 # Remove it if it's there (so nightly=FALSE won't accidentally pull from it) 118 repos <- setdiff(repos, dev_repo) 119 if (nightly) { 120 # Add it first 121 repos <- c(dev_repo, repos) 122 } 123 repos 124} 125 126reload_arrow <- function() { 127 if (requireNamespace("pkgload", quietly = TRUE)) { 128 is_attached <- "package:arrow" %in% search() 129 pkgload::unload("arrow") 130 if (is_attached) { 131 require("arrow", character.only = TRUE, quietly = TRUE) 132 } else { 133 requireNamespace("arrow", quietly = TRUE) 134 } 135 } else { 136 message("Please restart R to use the 'arrow' package.") 137 } 138} 139 140 141#' Create a source bundle that includes all thirdparty dependencies 142#' 143#' @param dest_file File path for the new tar.gz package. Defaults to 144#' `arrow_V.V.V_with_deps.tar.gz` in the current directory (`V.V.V` is the version) 145#' @param source_file File path for the input tar.gz package. Defaults to 146#' downloading the package from CRAN (or whatever you have set as the first in 147#' `getOption("repos")`) 148#' @return The full path to `dest_file`, invisibly 149#' 150#' This function is used for setting up an offline build. If it's possible to 151#' download at build time, don't use this function. Instead, let `cmake` 152#' download the required dependencies for you. 153#' These downloaded dependencies are only used in the build if 154#' `ARROW_DEPENDENCY_SOURCE` is unset, `BUNDLED`, or `AUTO`. 155#' https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds 156#' 157#' If you're using binary packages you shouldn't need to use this function. You 158#' should download the appropriate binary from your package repository, transfer 159#' that to the offline computer, and install that. Any OS can create the source 160#' bundle, but it cannot be installed on Windows. (Instead, use a standard 161#' Windows binary package.) 162#' 163#' Note if you're using RStudio Package Manager on Linux: If you still want to 164#' make a source bundle with this function, make sure to set the first repo in 165#' `options("repos")` to be a mirror that contains source packages (that is: 166#' something other than the RSPM binary mirror URLs). 167#' 168#' ## Steps for an offline install with optional dependencies: 169#' 170#' ### Using a computer with internet access, pre-download the dependencies: 171#' * Install the `arrow` package _or_ run 172#' `source("https://raw.githubusercontent.com/apache/arrow/master/r/R/install-arrow.R")` 173#' * Run `create_package_with_all_dependencies("my_arrow_pkg.tar.gz")` 174#' * Copy the newly created `my_arrow_pkg.tar.gz` to the computer without internet access 175#' 176#' ### On the computer without internet access, install the prepared package: 177#' * Install the `arrow` package from the copied file 178#' * `install.packages("my_arrow_pkg.tar.gz", dependencies = c("Depends", "Imports", "LinkingTo"))` 179#' * This installation will build from source, so `cmake` must be available 180#' * Run [arrow_info()] to check installed capabilities 181#' 182#' 183#' @examples 184#' \dontrun{ 185#' new_pkg <- create_package_with_all_dependencies() 186#' # Note: this works when run in the same R session, but it's meant to be 187#' # copied to a different computer. 188#' install.packages(new_pkg, dependencies = c("Depends", "Imports", "LinkingTo")) 189#' } 190#' @export 191create_package_with_all_dependencies <- function(dest_file = NULL, source_file = NULL) { 192 if (is.null(source_file)) { 193 pkg_download_dir <- tempfile() 194 dir.create(pkg_download_dir) 195 on.exit(unlink(pkg_download_dir, recursive = TRUE), add = TRUE) 196 message("Downloading Arrow source file") 197 downloaded <- utils::download.packages("arrow", destdir = pkg_download_dir, type = "source") 198 source_file <- downloaded[1, 2, drop = TRUE] 199 } 200 if (!file.exists(source_file) || !endsWith(source_file, "tar.gz")) { 201 stop("Arrow package .tar.gz file not found") 202 } 203 if (is.null(dest_file)) { 204 # e.g. convert /path/to/arrow_5.0.0.tar.gz to ./arrow_5.0.0_with_deps.tar.gz 205 # (add 'with_deps' for clarity if the file was downloaded locally) 206 dest_file <- paste0(gsub(".tar.gz$", "", basename(source_file)), "_with_deps.tar.gz") 207 } 208 untar_dir <- tempfile() 209 on.exit(unlink(untar_dir, recursive = TRUE), add = TRUE) 210 utils::untar(source_file, exdir = untar_dir) 211 tools_dir <- file.path(untar_dir, "arrow/tools") 212 download_dependencies_sh <- file.path(tools_dir, "cpp/thirdparty/download_dependencies.sh") 213 # If you change this path, also need to edit nixlibs.R 214 download_dir <- file.path(tools_dir, "thirdparty_dependencies") 215 dir.create(download_dir) 216 217 message("Downloading files to ", download_dir) 218 download_successful <- system2(download_dependencies_sh, download_dir, stdout = FALSE) == 0 219 if (!download_successful) { 220 stop("Failed to download thirdparty dependencies") 221 } 222 # Need to change directory to untar_dir so tar() will use relative paths. That 223 # means we'll need a full, non-relative path for dest_file. (extra_flags="-C" 224 # doesn't work with R's internal tar) 225 orig_wd <- getwd() 226 on.exit(setwd(orig_wd), add = TRUE) 227 # normalizePath() may return the input unchanged if dest_file doesn't exist, 228 # so create it first. 229 file.create(dest_file) 230 dest_file <- normalizePath(dest_file, mustWork = TRUE) 231 setwd(untar_dir) 232 233 message("Repacking tar.gz file to ", dest_file) 234 tar_successful <- utils::tar(dest_file, compression = "gz") == 0 235 if (!tar_successful) { 236 stop("Failed to create new tar.gz file") 237 } 238 invisible(dest_file) 239} 240