1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18args <- commandArgs(TRUE)
19VERSION <- args[1]
20dst_dir <- paste0("libarrow/arrow-", VERSION)
21
22arrow_repo <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/"
23
24if (getRversion() < 3.4 && is.null(getOption("download.file.method"))) {
25  # default method doesn't work on R 3.3, nor does libcurl
26  options(download.file.method = "wget")
27}
28
29options(.arrow.cleanup = character()) # To collect dirs to rm on exit
30on.exit(unlink(getOption(".arrow.cleanup")))
31
32env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value)
33# * no download, build_ok: Only build with local git checkout
34# * download_ok, no build: Only use prebuilt binary, if found
35# * neither: Get the arrow-without-arrow package
36# Download and build are OK unless you say not to
37download_ok <- !env_is("LIBARROW_DOWNLOAD", "false")
38build_ok <- !env_is("LIBARROW_BUILD", "false")
39# But binary defaults to not OK
40binary_ok <- !identical(tolower(Sys.getenv("LIBARROW_BINARY", "false")), "false")
41# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
42quietly <- !env_is("ARROW_R_DEV", "true")
43
44try_download <- function(from_url, to_file) {
45  status <- try(
46    suppressWarnings(
47      download.file(from_url, to_file, quiet = quietly)
48    ),
49    silent = quietly
50  )
51  # Return whether the download was successful
52  !inherits(status, "try-error") && status == 0
53}
54
55download_binary <- function(os = identify_os()) {
56  libfile <- tempfile()
57  if (!is.null(os)) {
58    # See if we can map this os-version to one we have binaries for
59    os <- find_available_binary(os)
60    binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip")
61    if (try_download(binary_url, libfile)) {
62      cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os))
63      if (!identical(os, "centos-7")) {
64        # centos-7 uses gcc 4.8 so the binary doesn't have ARROW_S3=ON but the others do
65        # TODO: actually check for system requirements?
66        cat("**** Binary package requires libcurl and openssl\n")
67        cat("**** If installation fails, retry after installing those system requirements\n")
68      }
69    } else {
70      cat(sprintf("*** No C++ binaries found for %s\n", os))
71      libfile <- NULL
72    }
73  } else {
74    libfile <- NULL
75  }
76  libfile
77}
78
79# Function to figure out which flavor of binary we should download, if at all.
80# By default (unset or "FALSE"), it will not download a precompiled library,
81# but you can override this by setting the env var LIBARROW_BINARY to:
82# * `TRUE` (not case-sensitive), to try to discover your current OS, or
83# * some other string, presumably a related "distro-version" that has binaries
84#   built that work for your OS
85identify_os <- function(os = Sys.getenv("LIBARROW_BINARY", Sys.getenv("LIBARROW_DOWNLOAD"))) {
86  if (tolower(os) %in% c("", "false")) {
87    # Env var says not to download a binary
88    return(NULL)
89  } else if (!identical(tolower(os), "true")) {
90    # Env var provided an os-version to use--maybe you're on Ubuntu 18.10 but
91    # we only build for 18.04 and that's fine--so use what the user set
92    return(os)
93  }
94
95  linux <- distro()
96  if (is.null(linux)) {
97    cat("*** Unable to identify current OS/version\n")
98    return(NULL)
99  }
100  paste(linux$id, linux$short_version, sep = "-")
101}
102
103#### start distro ####
104
105distro <- function() {
106  # The code in this script is a (potentially stale) copy of the distro package
107  if (requireNamespace("distro", quietly = TRUE)) {
108    # Use the version from the package, which may be updated from this
109    return(distro::distro())
110  }
111
112  out <- lsb_release()
113  if (is.null(out)) {
114    out <- os_release()
115    if (is.null(out)) {
116      out <- system_release()
117    }
118  }
119  if (is.null(out)) {
120    return(NULL)
121  }
122
123  out$id <- tolower(out$id)
124  if (grepl("bullseye", out$codename)) {
125    # debian unstable doesn't include a number but we can map from pretty name
126    out$short_version <- "11"
127  } else if (out$id == "ubuntu") {
128    # Keep major.minor version
129    out$short_version <- sub('^"?([0-9]+\\.[0-9]+).*"?.*$', "\\1", out$version)
130  } else {
131    # Only major version number
132    out$short_version <- sub('^"?([0-9]+).*"?.*$', "\\1", out$version)
133  }
134  out
135}
136
137lsb_release <- function() {
138  if (have_lsb_release()) {
139    list(
140      id = call_lsb("-is"),
141      version = call_lsb("-rs"),
142      codename = call_lsb("-cs")
143    )
144  } else {
145    NULL
146  }
147}
148
149have_lsb_release <- function() nzchar(Sys.which("lsb_release"))
150call_lsb <- function(args) system(paste("lsb_release", args), intern = TRUE)
151
152os_release <- function() {
153  rel_data <- read_os_release()
154  if (!is.null(rel_data)) {
155    vals <- as.list(sub('^.*="?(.*?)"?$', "\\1", rel_data))
156    names(vals) <- sub("^(.*)=.*$", "\\1", rel_data)
157
158    out <- list(
159      id = vals[["ID"]],
160      version = vals[["VERSION_ID"]]
161    )
162    if ("VERSION_CODENAME" %in% names(vals)) {
163      out$codename <- vals[["VERSION_CODENAME"]]
164    } else {
165      # This probably isn't right, maybe could extract codename from pretty name?
166      out$codename = vals[["PRETTY_NAME"]]
167    }
168    out
169  } else {
170    NULL
171  }
172}
173
174read_os_release <- function() {
175  if (file.exists("/etc/os-release")) {
176    readLines("/etc/os-release")
177  }
178}
179
180system_release <- function() {
181  rel_data <- read_system_release()
182  if (!is.null(rel_data)) {
183    # Something like "CentOS Linux release 7.7.1908 (Core)"
184    list(
185      id = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\1", rel_data),
186      version = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\2", rel_data),
187      codename = NA
188    )
189  } else {
190    NULL
191  }
192}
193
194read_system_release <- function() utils::head(readLines("/etc/system-release"), 1)
195
196#### end distro ####
197
198find_available_binary <- function(os) {
199  # Download a csv that maps one to the other, columns "actual" and "use_this"
200  u <- "https://raw.githubusercontent.com/ursa-labs/arrow-r-nightly/master/linux/distro-map.csv"
201  lookup <- try(utils::read.csv(u, stringsAsFactors = FALSE), silent = quietly)
202  if (!inherits(lookup, "try-error") && os %in% lookup$actual) {
203    new <- lookup$use_this[lookup$actual == os]
204    if (length(new) == 1 && !is.na(new)) { # Just some sanity checking
205      cat(sprintf("*** Using %s binary for %s\n", new, os))
206      os <- new
207    }
208  }
209  os
210}
211
212download_source <- function() {
213  tf1 <- tempfile()
214  src_dir <- tempfile()
215  if (bintray_download(tf1)) {
216    # First try from bintray
217    cat("*** Successfully retrieved C++ source\n")
218    unzip(tf1, exdir = src_dir)
219    unlink(tf1)
220    src_dir <- paste0(src_dir, "/cpp")
221  } else if (apache_download(tf1)) {
222    # If that fails, try for an official release
223    cat("*** Successfully retrieved C++ source\n")
224    untar(tf1, exdir = src_dir)
225    unlink(tf1)
226    src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
227  }
228
229  if (dir.exists(src_dir)) {
230    options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
231    # These scripts need to be executable
232    system(
233      sprintf("chmod 755 %s/build-support/*.sh", src_dir),
234      ignore.stdout = quietly, ignore.stderr = quietly
235    )
236    return(src_dir)
237  } else {
238    return(NULL)
239  }
240}
241
242bintray_download <- function(destfile) {
243  source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
244  try_download(source_url, destfile)
245}
246
247apache_download <- function(destfile, n_mirrors = 3) {
248  apache_path <- paste0("arrow/arrow-", VERSION, "/apache-arrow-", VERSION, ".tar.gz")
249  apache_urls <- c(
250    # This returns a different mirror each time
251    rep("https://www.apache.org/dyn/closer.lua?action=download&filename=", n_mirrors),
252    "https://downloads.apache.org/" # The backup
253  )
254  downloaded <- FALSE
255  for (u in apache_urls) {
256    downloaded <- try_download(paste0(u, apache_path), destfile)
257    if (downloaded) {
258      break
259    }
260  }
261  downloaded
262}
263
264find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) {
265  if (file.exists(paste0(arrow_home, "/cpp/src/arrow/api.h"))) {
266    # We're in a git checkout of arrow, so we can build it
267    cat("*** Found local C++ source\n")
268    return(paste0(arrow_home, "/cpp"))
269  } else {
270    return(NULL)
271  }
272}
273
274build_libarrow <- function(src_dir, dst_dir) {
275  # We'll need to compile R bindings with these libs, so delete any .o files
276  system("rm src/*.o", ignore.stdout = TRUE, ignore.stderr = TRUE)
277  # Set up make for parallel building
278  makeflags <- Sys.getenv("MAKEFLAGS")
279  if (makeflags == "") {
280    # CRAN policy says not to use more than 2 cores during checks
281    # If you have more and want to use more, set MAKEFLAGS
282    ncores <- min(parallel::detectCores(), 2)
283    makeflags <- sprintf("-j%s", ncores)
284    Sys.setenv(MAKEFLAGS = makeflags)
285  }
286  if (!quietly) {
287    cat("*** Building with MAKEFLAGS=", makeflags, "\n")
288  }
289  # Check for libarrow build dependencies:
290  # * cmake
291  cmake <- ensure_cmake()
292
293  # Optionally build somewhere not in tmp so we can dissect the build if it fails
294  debug_dir <- Sys.getenv("LIBARROW_DEBUG_DIR")
295  if (nzchar(debug_dir)) {
296    build_dir <- debug_dir
297  } else {
298    # But normally we'll just build in a tmp dir
299    build_dir <- tempfile()
300  }
301  options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir))
302
303  R_CMD_config <- function(var) {
304    if (getRversion() < 3.4) {
305      # var names were called CXX1X instead of CXX11
306      var <- sub("^CXX11", "CXX1X", var)
307    }
308    # tools::Rcmd introduced R 3.3
309    tools::Rcmd(paste("config", var), stdout = TRUE)
310  }
311  env_var_list <- c(
312    SOURCE_DIR = src_dir,
313    BUILD_DIR = build_dir,
314    DEST_DIR = dst_dir,
315    CMAKE = cmake,
316    # Make sure we build with the same compiler settings that R is using
317    CC = R_CMD_config("CC"),
318    CXX = paste(R_CMD_config("CXX11"), R_CMD_config("CXX11STD")),
319    # CXXFLAGS = R_CMD_config("CXX11FLAGS"), # We don't want the same debug symbols
320    LDFLAGS = R_CMD_config("LDFLAGS")
321  )
322  env_vars <- paste0(names(env_var_list), '="', env_var_list, '"', collapse = " ")
323  env_vars <- with_s3_support(env_vars)
324  env_vars <- with_mimalloc(env_vars)
325  if (tolower(Sys.info()[["sysname"]]) %in% "sunos") {
326    # jemalloc doesn't seem to build on Solaris
327    env_vars <- paste(env_vars, "ARROW_JEMALLOC=OFF")
328  }
329  cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n")
330  status <- system(
331    paste(env_vars, "inst/build_arrow_static.sh"),
332    ignore.stdout = quietly, ignore.stderr = quietly
333  )
334  if (status != 0) {
335    # It failed :(
336    cat("**** Error building Arrow C++. Re-run with ARROW_R_DEV=true for debug information.\n")
337  }
338  invisible(status)
339}
340
341ensure_cmake <- function() {
342  cmake <- find_cmake(c(
343    Sys.getenv("CMAKE"),
344    Sys.which("cmake"),
345    Sys.which("cmake3")
346  ))
347
348  if (is.null(cmake)) {
349    # If not found, download it
350    cat("**** cmake\n")
351    CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.19.2")
352    cmake_binary_url <- paste0(
353      "https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION,
354      "/cmake-", CMAKE_VERSION, "-Linux-x86_64.tar.gz"
355    )
356    cmake_tar <- tempfile()
357    cmake_dir <- tempfile()
358    try_download(cmake_binary_url, cmake_tar)
359    untar(cmake_tar, exdir = cmake_dir)
360    unlink(cmake_tar)
361    options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir))
362    cmake <- paste0(
363      cmake_dir,
364      "/cmake-", CMAKE_VERSION, "-Linux-x86_64",
365      "/bin/cmake"
366    )
367  }
368  cmake
369}
370
371find_cmake <- function(paths, version_required = 3.2) {
372  # Given a list of possible cmake paths, return the first one that exists and is new enough
373  for (path in paths) {
374    if (nzchar(path) && cmake_version(path) >= version_required) {
375      # Sys.which() returns a named vector, but that plays badly with c() later
376      names(path) <- NULL
377      return(path)
378    }
379  }
380  # If none found, return NULL
381  NULL
382}
383
384cmake_version <- function(cmd = "cmake") {
385  tryCatch(
386    {
387      raw_version <- system(paste(cmd, "--version"), intern = TRUE, ignore.stderr = TRUE)
388      pat <- ".* ([0-9\\.]+).*?"
389      which_line <- grep(pat, raw_version)
390      package_version(sub(pat, "\\1", raw_version[which_line]))
391    },
392    error = function(e) return(0)
393  )
394}
395
396with_s3_support <- function(env_vars) {
397  arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
398  if (arrow_s3) {
399    # User wants S3 support. If they're using gcc, let's make sure the version is >= 4.9
400    # and make sure that we have curl and openssl system libs
401    if (isTRUE(cmake_gcc_version(env_vars) < "4.9")) {
402      cat("**** S3 support not available for gcc < 4.9; building with ARROW_S3=OFF\n")
403      arrow_s3 <- FALSE
404    } else if (!cmake_find_package("CURL", NULL, env_vars)) {
405      cat("**** S3 support requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb); building with ARROW_S3=OFF\n")
406      arrow_s3 <- FALSE
407    } else if (!cmake_find_package("OpenSSL", "1.0.2", env_vars)) {
408      cat("**** S3 support requires openssl-devel (rpm) or libssl-dev (deb), version >= 1.0.2; building with ARROW_S3=OFF\n")
409      arrow_s3 <- FALSE
410    }
411  }
412  paste(env_vars, ifelse(arrow_s3, "ARROW_S3=ON", "ARROW_S3=OFF"))
413}
414
415with_mimalloc <- function(env_vars) {
416  arrow_mimalloc <- toupper(Sys.getenv("ARROW_MIMALLOC")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
417  if (arrow_mimalloc) {
418  # User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9
419    if (isTRUE(cmake_gcc_version(env_vars) < "4.9")) {
420      cat("**** mimalloc support not available for gcc < 4.9; building with ARROW_MIMALLOC=OFF\n")
421      arrow_mimalloc <- FALSE
422    }
423  }
424  paste(env_vars, ifelse(arrow_mimalloc, "ARROW_MIMALLOC=ON", "ARROW_MIMALLOC=OFF"))
425}
426
427cmake_gcc_version <- function(env_vars) {
428  # This function returns NA if using a non-gcc compiler
429  # Always enclose calls to it in isTRUE() or isFALSE()
430  vals <- cmake_cxx_compiler_vars(env_vars)
431  if (!identical(vals[["CMAKE_CXX_COMPILER_ID"]], "GNU")) {
432    return(NA)
433  }
434  package_version(vals[["CMAKE_CXX_COMPILER_VERSION"]])
435}
436
437cmake_cxx_compiler_vars <- function(env_vars) {
438  info <- system(paste("export", env_vars, "&& $CMAKE --system-information"), intern = TRUE)
439  info <- grep("^[A-Z_]* .*$", info, value = TRUE)
440  vals <- as.list(sub('^.*? "?(.*?)"?$', "\\1", info))
441  names(vals) <- sub("^(.*?) .*$", "\\1", info)
442  vals[grepl("^CMAKE_CXX_COMPILER_?", names(vals))]
443}
444
445cmake_find_package <- function(pkg, version = NULL, env_vars) {
446  td <- tempfile()
447  dir.create(td)
448  options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td))
449  find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)")
450  writeLines(find_package, file.path(td, "CMakeLists.txt"))
451  cmake_cmd <- paste0(
452    "export ", env_vars,
453    " && cd ", td,
454    " && $CMAKE ",
455    " -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON",
456    " -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON",
457    " ."
458  )
459  system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0
460}
461
462#####
463
464if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) {
465  # If we're working in a local checkout and have already built the libs, we
466  # don't need to do anything. Otherwise,
467  # (1) Look for a prebuilt binary for this version
468  bin_file <- src_dir <- NULL
469  if (download_ok && binary_ok) {
470    bin_file <- download_binary()
471  }
472  if (!is.null(bin_file)) {
473    # Extract them
474    dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE)
475    unzip(bin_file, exdir = dst_dir)
476    unlink(bin_file)
477  } else if (build_ok) {
478    # (2) Find source and build it
479    if (download_ok) {
480      src_dir <- download_source()
481    }
482    if (is.null(src_dir)) {
483      src_dir <- find_local_source()
484    }
485    if (!is.null(src_dir)) {
486      cat("*** Building C++ libraries\n")
487      build_libarrow(src_dir, dst_dir)
488    } else {
489      cat("*** Proceeding without C++ dependencies\n")
490    }
491  } else {
492   cat("*** Proceeding without C++ dependencies\n")
493  }
494}
495