1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18#' Install or upgrade the Arrow library
19#'
20#' Use this function to install the latest release of `arrow`, to switch to or
21#' from a nightly development version, or on Linux to try reinstalling with
22#' all necessary C++ dependencies.
23#'
24#' Note that, unlike packages like `tensorflow`, `blogdown`, and others that
25#' require external dependencies, you do not need to run `install_arrow()`
26#' after a successful `arrow` installation.
27#'
28#' @param nightly logical: Should we install a development version of the
29#' package, or should we install from CRAN (the default).
30#' @param binary On Linux, value to set for the environment variable
31#' `LIBARROW_BINARY`, which governs how C++ binaries are used, if at all.
32#' The default value, `TRUE`, tells the installation script to detect the
33#' Linux distribution and version and find an appropriate C++ library. `FALSE`
34#' would tell the script not to retrieve a binary and instead build Arrow C++
35#' from source. Other valid values are strings corresponding to a Linux
36#' distribution-version, to override the value that would be detected.
37#' See `vignette("install", package = "arrow")` for further details.
38#' @param use_system logical: Should we use `pkg-config` to look for Arrow
39#' system packages? Default is `FALSE`. If `TRUE`, source installation may be
40#' faster, but there is a risk of version mismatch. This sets the
41#' `ARROW_USE_PKG_CONFIG` environment variable.
42#' @param minimal logical: If building from source, should we build without
43#' optional dependencies (compression libraries, for example)? Default is
44#' `FALSE`. This sets the `LIBARROW_MINIMAL` environment variable.
45#' @param verbose logical: Print more debugging output when installing? Default
46#' is `FALSE`. This sets the `ARROW_R_DEV` environment variable.
47#' @param repos character vector of base URLs of the repositories to install
48#' from (passed to `install.packages()`)
49#' @param ... Additional arguments passed to `install.packages()`
50#' @export
51#' @importFrom utils install.packages
52#' @seealso [arrow_available()] to see if the package was configured with
53#' necessary C++ dependencies. `vignette("install", package = "arrow")` for
54#' more ways to tune installation on Linux.
55install_arrow <- function(nightly = FALSE,
56                          binary = Sys.getenv("LIBARROW_BINARY", TRUE),
57                          use_system = Sys.getenv("ARROW_USE_PKG_CONFIG", FALSE),
58                          minimal = Sys.getenv("LIBARROW_MINIMAL", FALSE),
59                          verbose = Sys.getenv("ARROW_R_DEV", FALSE),
60                          repos = getOption("repos"),
61                          ...) {
62  sysname <- tolower(Sys.info()[["sysname"]])
63  conda <- isTRUE(grepl("conda", R.Version()$platform))
64
65  if (conda) {
66    if (nightly) {
67      system("conda install -y -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow")
68    } else {
69      system("conda install -y -c conda-forge --strict-channel-priority r-arrow")
70    }
71  } else {
72    Sys.setenv(
73      LIBARROW_BINARY = binary,
74      LIBARROW_MINIMAL = minimal,
75      ARROW_R_DEV = verbose,
76      ARROW_USE_PKG_CONFIG = use_system
77    )
78    # On the M1, we can't use the usual autobrew, which pulls Intel dependencies
79    apple_m1 <- grepl("arm-apple|aarch64.*darwin", R.Version()$platform)
80    # On Rosetta, we have to build without JEMALLOC, so we also can't autobrew
81    rosetta <- identical(sysname, "darwin") && identical(system("sysctl -n sysctl.proc_translated", intern = TRUE), "1")
82    if (rosetta) {
83      Sys.setenv(ARROW_JEMALLOC = "OFF")
84    }
85    if (apple_m1 || rosetta) {
86      Sys.setenv(FORCE_BUNDLED_BUILD = "true")
87    }
88
89    opts <- list()
90    if (apple_m1 || rosetta) {
91      # Skip binaries (esp. for rosetta)
92      opts$pkgType <- "source"
93    } else if (isTRUE(binary)) {
94      # Unless otherwise directed, don't consider newer source packages when
95      # options(pkgType) == "both" (default on win/mac)
96      opts$install.packages.check.source <- "no"
97      opts$install.packages.compile.from.source <- "never"
98    }
99    if (length(opts)) {
100      old <- options(opts)
101      on.exit(options(old))
102    }
103    install.packages("arrow", repos = arrow_repos(repos, nightly), ...)
104  }
105  if ("arrow" %in% loadedNamespaces()) {
106    # If you've just sourced this file, "arrow" won't be (re)loaded
107    reload_arrow()
108  }
109}
110
111arrow_repos <- function(repos = getOption("repos"), nightly = FALSE) {
112  if (length(repos) == 0 || identical(repos, c(CRAN = "@CRAN@"))) {
113    # Set the default/CDN
114    repos <- "https://cloud.r-project.org/"
115  }
116  dev_repo <- getOption("arrow.dev_repo", "https://arrow-r-nightly.s3.amazonaws.com")
117  # Remove it if it's there (so nightly=FALSE won't accidentally pull from it)
118  repos <- setdiff(repos, dev_repo)
119  if (nightly) {
120    # Add it first
121    repos <- c(dev_repo, repos)
122  }
123  repos
124}
125
126reload_arrow <- function() {
127  if (requireNamespace("pkgload", quietly = TRUE)) {
128    is_attached <- "package:arrow" %in% search()
129    pkgload::unload("arrow")
130    if (is_attached) {
131      require("arrow", character.only = TRUE, quietly = TRUE)
132    } else {
133      requireNamespace("arrow", quietly = TRUE)
134    }
135  } else {
136    message("Please restart R to use the 'arrow' package.")
137  }
138}
139
140
141#' Create a source bundle that includes all thirdparty dependencies
142#'
143#' @param dest_file File path for the new tar.gz package. Defaults to
144#' `arrow_V.V.V_with_deps.tar.gz` in the current directory (`V.V.V` is the version)
145#' @param source_file File path for the input tar.gz package. Defaults to
146#' downloading the package from CRAN (or whatever you have set as the first in
147#' `getOption("repos")`)
148#' @return The full path to `dest_file`, invisibly
149#'
150#' This function is used for setting up an offline build. If it's possible to
151#' download at build time, don't use this function. Instead, let `cmake`
152#' download the required dependencies for you.
153#' These downloaded dependencies are only used in the build if
154#' `ARROW_DEPENDENCY_SOURCE` is unset, `BUNDLED`, or `AUTO`.
155#' https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds
156#'
157#' If you're using binary packages you shouldn't need to use this function. You
158#' should download the appropriate binary from your package repository, transfer
159#' that to the offline computer, and install that. Any OS can create the source
160#' bundle, but it cannot be installed on Windows. (Instead, use a standard
161#' Windows binary package.)
162#'
163#' Note if you're using RStudio Package Manager on Linux: If you still want to
164#' make a source bundle with this function, make sure to set the first repo in
165#' `options("repos")` to be a mirror that contains source packages (that is:
166#' something other than the RSPM binary mirror URLs).
167#'
168#' ## Steps for an offline install with optional dependencies:
169#'
170#' ### Using a computer with internet access, pre-download the dependencies:
171#' * Install the `arrow` package _or_ run
172#'   `source("https://raw.githubusercontent.com/apache/arrow/master/r/R/install-arrow.R")`
173#' * Run `create_package_with_all_dependencies("my_arrow_pkg.tar.gz")`
174#' * Copy the newly created `my_arrow_pkg.tar.gz` to the computer without internet access
175#'
176#' ### On the computer without internet access, install the prepared package:
177#' * Install the `arrow` package from the copied file
178#'   * `install.packages("my_arrow_pkg.tar.gz", dependencies = c("Depends", "Imports", "LinkingTo"))`
179#'   * This installation will build from source, so `cmake` must be available
180#' * Run [arrow_info()] to check installed capabilities
181#'
182#'
183#' @examples
184#' \dontrun{
185#' new_pkg <- create_package_with_all_dependencies()
186#' # Note: this works when run in the same R session, but it's meant to be
187#' # copied to a different computer.
188#' install.packages(new_pkg, dependencies = c("Depends", "Imports", "LinkingTo"))
189#' }
190#' @export
191create_package_with_all_dependencies <- function(dest_file = NULL, source_file = NULL) {
192  if (is.null(source_file)) {
193    pkg_download_dir <- tempfile()
194    dir.create(pkg_download_dir)
195    on.exit(unlink(pkg_download_dir, recursive = TRUE), add = TRUE)
196    message("Downloading Arrow source file")
197    downloaded <- utils::download.packages("arrow", destdir = pkg_download_dir, type = "source")
198    source_file <- downloaded[1, 2, drop = TRUE]
199  }
200  if (!file.exists(source_file) || !endsWith(source_file, "tar.gz")) {
201    stop("Arrow package .tar.gz file not found")
202  }
203  if (is.null(dest_file)) {
204    # e.g. convert /path/to/arrow_5.0.0.tar.gz to ./arrow_5.0.0_with_deps.tar.gz
205    # (add 'with_deps' for clarity if the file was downloaded locally)
206    dest_file <- paste0(gsub(".tar.gz$", "", basename(source_file)), "_with_deps.tar.gz")
207  }
208  untar_dir <- tempfile()
209  on.exit(unlink(untar_dir, recursive = TRUE), add = TRUE)
210  utils::untar(source_file, exdir = untar_dir)
211  tools_dir <- file.path(untar_dir, "arrow/tools")
212  download_dependencies_sh <- file.path(tools_dir, "cpp/thirdparty/download_dependencies.sh")
213  # If you change this path, also need to edit nixlibs.R
214  download_dir <- file.path(tools_dir, "thirdparty_dependencies")
215  dir.create(download_dir)
216
217  message("Downloading files to ", download_dir)
218  download_successful <- system2(download_dependencies_sh, download_dir, stdout = FALSE) == 0
219  if (!download_successful) {
220    stop("Failed to download thirdparty dependencies")
221  }
222  # Need to change directory to untar_dir so tar() will use relative paths. That
223  # means we'll need a full, non-relative path for dest_file. (extra_flags="-C"
224  # doesn't work with R's internal tar)
225  orig_wd <- getwd()
226  on.exit(setwd(orig_wd), add = TRUE)
227  # normalizePath() may return the input unchanged if dest_file doesn't exist,
228  # so create it first.
229  file.create(dest_file)
230  dest_file <- normalizePath(dest_file, mustWork = TRUE)
231  setwd(untar_dir)
232
233  message("Repacking tar.gz file to ", dest_file)
234  tar_successful <- utils::tar(dest_file, compression = "gz") == 0
235  if (!tar_successful) {
236    stop("Failed to create new tar.gz file")
237  }
238  invisible(dest_file)
239}
240