1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18args <- commandArgs(TRUE)
19VERSION <- args[1]
20dst_dir <- paste0("libarrow/arrow-", VERSION)
21
22arrow_repo <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/"
23
24options(.arrow.cleanup = character()) # To collect dirs to rm on exit
25on.exit(unlink(getOption(".arrow.cleanup")))
26
27env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value)
28# * no download, build_ok: Only build with local git checkout
29# * download_ok, no build: Only use prebuilt binary, if found
30# * neither: Get the arrow-without-arrow package
31# Download and build are OK unless you say not to
32download_ok <- !env_is("LIBARROW_DOWNLOAD", "false")
33build_ok <- !env_is("LIBARROW_BUILD", "false")
34# But binary defaults to not OK
35binary_ok <- !identical(tolower(Sys.getenv("LIBARROW_BINARY", "false")), "false")
36# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
37quietly <- !env_is("ARROW_R_DEV", "true")
38
39try_download <- function(from_url, to_file) {
40  status <- try(
41    suppressWarnings(
42      download.file(from_url, to_file, quiet = quietly)
43    ),
44    silent = quietly
45  )
46  # Return whether the download was successful
47  !inherits(status, "try-error") && status == 0
48}
49
50download_binary <- function(os = identify_os()) {
51  libfile <- tempfile()
52  if (!is.null(os)) {
53    # See if we can map this os-version to one we have binaries for
54    os <- find_available_binary(os)
55    binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip")
56    if (try_download(binary_url, libfile)) {
57      cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os))
58    } else {
59      cat(sprintf("*** No C++ binaries found for %s\n", os))
60      libfile <- NULL
61    }
62  } else {
63    libfile <- NULL
64  }
65  libfile
66}
67
68# Function to figure out which flavor of binary we should download, if at all.
69# By default (unset or "FALSE"), it will not download a precompiled library,
70# but you can override this by setting the env var LIBARROW_BINARY to:
71# * `TRUE` (not case-sensitive), to try to discover your current OS, or
72# * some other string, presumably a related "distro-version" that has binaries
73#   built that work for your OS
74identify_os <- function(os = Sys.getenv("LIBARROW_BINARY", Sys.getenv("LIBARROW_DOWNLOAD"))) {
75  if (tolower(os) %in% c("", "false")) {
76    # Env var says not to download a binary
77    return(NULL)
78  } else if (!identical(tolower(os), "true")) {
79    # Env var provided an os-version to use--maybe you're on Ubuntu 18.10 but
80    # we only build for 18.04 and that's fine--so use what the user set
81    return(os)
82  }
83
84  linux <- distro()
85  if (is.null(linux)) {
86    cat("*** Unable to identify current OS/version\n")
87    return(NULL)
88  }
89  paste(linux$id, linux$short_version, sep = "-")
90}
91
92#### start distro ####
93
94distro <- function() {
95  out <- lsb_release()
96  if (is.null(out)) {
97    out <- os_release()
98    if (is.null(out)) {
99      out <- system_release()
100    }
101  }
102  if (is.null(out)) {
103    return(NULL)
104  }
105
106  out$id <- tolower(out$id)
107  if (grepl("bullseye", out$codename)) {
108    # debian unstable doesn't include a number but we can map from pretty name
109    out$short_version <- "11"
110  } else if (out$id == "ubuntu") {
111    # Keep major.minor version
112    out$short_version <- sub('^"?([0-9]+\\.[0-9]+).*"?.*$', "\\1", out$version)
113  } else {
114    # Only major version number
115    out$short_version <- sub('^"?([0-9]+).*"?.*$', "\\1", out$version)
116  }
117  out
118}
119
120lsb_release <- function() {
121  if (have_lsb_release()) {
122    list(
123      id = call_lsb("-is"),
124      version = call_lsb("-rs"),
125      codename = call_lsb("-cs")
126    )
127  } else {
128    NULL
129  }
130}
131
132have_lsb_release <- function() nzchar(Sys.which("lsb_release"))
133call_lsb <- function(args) system(paste("lsb_release", args), intern = TRUE)
134
135os_release <- function() {
136  rel_data <- read_os_release()
137  if (!is.null(rel_data)) {
138    vals <- as.list(sub('^.*="?(.*?)"?$', "\\1", rel_data))
139    names(vals) <- sub("^(.*)=.*$", "\\1", rel_data)
140
141    out <- list(
142      id = vals[["ID"]],
143      version = vals[["VERSION_ID"]]
144    )
145    if ("VERSION_CODENAME" %in% names(vals)) {
146      out$codename <- vals[["VERSION_CODENAME"]]
147    } else {
148      # This probably isn't right, maybe could extract codename from pretty name?
149      out$codename = vals[["PRETTY_NAME"]]
150    }
151    out
152  } else {
153    NULL
154  }
155}
156
157read_os_release <- function() {
158  if (file.exists("/etc/os-release")) {
159    readLines("/etc/os-release")
160  }
161}
162
163system_release <- function() {
164  rel_data <- read_system_release()
165  if (!is.null(rel_data)) {
166    # Something like "CentOS Linux release 7.7.1908 (Core)"
167    list(
168      id = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\1", rel_data),
169      version = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\2", rel_data),
170      codename = NA
171    )
172  } else {
173    NULL
174  }
175}
176
177read_system_release <- function() utils::head(readLines("/etc/system-release"), 1)
178
179#### end distro ####
180
181find_available_binary <- function(os) {
182  # Download a csv that maps one to the other, columns "actual" and "use_this"
183  u <- "https://raw.githubusercontent.com/ursa-labs/arrow-r-nightly/master/linux/distro-map.csv"
184  lookup <- try(utils::read.csv(u, stringsAsFactors = FALSE), silent = quietly)
185  if (!inherits(lookup, "try-error") && os %in% lookup$actual) {
186    new <- lookup$use_this[lookup$actual == os]
187    if (length(new) == 1 && !is.na(new)) { # Just some sanity checking
188      cat(sprintf("*** Using %s binary for %s\n", new, os))
189      os <- new
190    }
191  }
192  os
193}
194
195download_source <- function() {
196  tf1 <- tempfile()
197  src_dir <- tempfile()
198  if (bintray_download(tf1)) {
199    # First try from bintray
200    cat("*** Successfully retrieved C++ source\n")
201    unzip(tf1, exdir = src_dir)
202    unlink(tf1)
203    src_dir <- paste0(src_dir, "/cpp")
204  } else if (apache_download(tf1)) {
205    # If that fails, try for an official release
206    cat("*** Successfully retrieved C++ source\n")
207    untar(tf1, exdir = src_dir)
208    unlink(tf1)
209    src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
210  }
211
212  if (dir.exists(src_dir)) {
213    options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
214    # These scripts need to be executable
215    system(
216      sprintf("chmod 755 %s/build-support/*.sh", src_dir),
217      ignore.stdout = quietly, ignore.stderr = quietly
218    )
219    return(src_dir)
220  } else {
221    return(NULL)
222  }
223}
224
225bintray_download <- function(destfile) {
226  source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
227  try_download(source_url, destfile)
228}
229
230apache_download <- function(destfile, n_mirrors = 3) {
231  apache_path <- paste0("arrow/arrow-", VERSION, "/apache-arrow-", VERSION, ".tar.gz")
232  apache_urls <- c(
233    # This returns a different mirror each time
234    rep("https://www.apache.org/dyn/closer.lua?action=download&filename=", n_mirrors),
235    "https://downloads.apache.org/" # The backup
236  )
237  downloaded <- FALSE
238  for (u in apache_urls) {
239    downloaded <- try_download(paste0(u, apache_path), destfile)
240    if (downloaded) {
241      break
242    }
243  }
244  downloaded
245}
246
247find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) {
248  if (file.exists(paste0(arrow_home, "/cpp/src/arrow/api.h"))) {
249    # We're in a git checkout of arrow, so we can build it
250    cat("*** Found local C++ source\n")
251    return(paste0(arrow_home, "/cpp"))
252  } else {
253    return(NULL)
254  }
255}
256
257build_libarrow <- function(src_dir, dst_dir) {
258  # We'll need to compile R bindings with these libs, so delete any .o files
259  system("rm src/*.o", ignore.stdout = quietly, ignore.stderr = quietly)
260  # Set up make for parallel building
261  makeflags <- Sys.getenv("MAKEFLAGS")
262  if (makeflags == "") {
263    # CRAN policy says not to use more than 2 cores during checks
264    # If you have more and want to use more, set MAKEFLAGS
265    ncores <- min(parallel::detectCores(), 2)
266    makeflags <- sprintf("-j%s", ncores)
267    Sys.setenv(MAKEFLAGS = makeflags)
268  }
269  if (!quietly) {
270    cat("*** Building with MAKEFLAGS=", makeflags, "\n")
271  }
272  # Check for libarrow build dependencies:
273  # * cmake
274  cmake <- ensure_cmake()
275
276  build_dir <- tempfile()
277  options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir))
278  env_vars <- sprintf(
279    "SOURCE_DIR=%s BUILD_DIR=%s DEST_DIR=%s CMAKE=%s",
280    src_dir,       build_dir,   dst_dir,    cmake
281  )
282  cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n")
283  system(
284    paste(env_vars, "inst/build_arrow_static.sh"),
285    ignore.stdout = quietly, ignore.stderr = quietly
286  )
287}
288
289ensure_cmake <- function() {
290  cmake <- Sys.which("cmake")
291  # TODO: should check that cmake is of sufficient version
292  if (!nzchar(cmake)) {
293    # If not found, download it
294    cat("**** cmake\n")
295    CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.16.2")
296    cmake_binary_url <- paste0(
297      "https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION,
298      "/cmake-", CMAKE_VERSION, "-Linux-x86_64.tar.gz"
299    )
300    cmake_tar <- tempfile()
301    cmake_dir <- tempfile()
302    try_download(cmake_binary_url, cmake_tar)
303    untar(cmake_tar, exdir = cmake_dir)
304    unlink(cmake_tar)
305    options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir))
306    cmake <- paste0(
307      cmake_dir,
308      "/cmake-", CMAKE_VERSION, "-Linux-x86_64",
309      "/bin/cmake"
310    )
311  }
312  cmake
313}
314
315#####
316
317if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) {
318  # If we're working in a local checkout and have already built the libs, we
319  # don't need to do anything. Otherwise,
320  # (1) Look for a prebuilt binary for this version
321  bin_file <- src_dir <- NULL
322  if (download_ok && binary_ok) {
323    bin_file <- download_binary()
324  }
325  if (!is.null(bin_file)) {
326    # Extract them
327    dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE)
328    unzip(bin_file, exdir = dst_dir)
329    unlink(bin_file)
330  } else if (build_ok) {
331    # (2) Find source and build it
332    if (download_ok) {
333      src_dir <- download_source()
334    }
335    if (is.null(src_dir)) {
336      src_dir <- find_local_source()
337    }
338    if (!is.null(src_dir)) {
339      cat("*** Building C++ libraries\n")
340      build_libarrow(src_dir, dst_dir)
341    } else {
342      cat("*** Proceeding without C++ dependencies\n")
343    }
344  } else {
345   cat("*** Proceeding without C++ dependencies\n")
346  }
347}
348