1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18args <- commandArgs(TRUE) 19VERSION <- args[1] 20dst_dir <- paste0("libarrow/arrow-", VERSION) 21 22arrow_repo <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/" 23 24if (getRversion() < 3.4 && is.null(getOption("download.file.method"))) { 25 # default method doesn't work on R 3.3, nor does libcurl 26 options(download.file.method = "wget") 27} 28 29options(.arrow.cleanup = character()) # To collect dirs to rm on exit 30on.exit(unlink(getOption(".arrow.cleanup"))) 31 32env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) 33# * no download, build_ok: Only build with local git checkout 34# * download_ok, no build: Only use prebuilt binary, if found 35# * neither: Get the arrow-without-arrow package 36# Download and build are OK unless you say not to 37download_ok <- !env_is("LIBARROW_DOWNLOAD", "false") 38build_ok <- !env_is("LIBARROW_BUILD", "false") 39# But binary defaults to not OK 40binary_ok <- !identical(tolower(Sys.getenv("LIBARROW_BINARY", "false")), "false") 41# For local debugging, set ARROW_R_DEV=TRUE to make this script print more 42quietly <- !env_is("ARROW_R_DEV", "true") 43 44try_download <- function(from_url, to_file) { 45 status <- try( 46 suppressWarnings( 47 download.file(from_url, to_file, quiet = quietly) 48 ), 49 silent = quietly 50 ) 51 # Return whether the download was successful 52 !inherits(status, "try-error") && status == 0 53} 54 55download_binary <- function(os = identify_os()) { 56 libfile <- tempfile() 57 if (!is.null(os)) { 58 # See if we can map this os-version to one we have binaries for 59 os <- find_available_binary(os) 60 binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip") 61 if (try_download(binary_url, libfile)) { 62 cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os)) 63 if (!identical(os, "centos-7")) { 64 # centos-7 uses gcc 4.8 so the binary doesn't have ARROW_S3=ON but the others do 65 # TODO: actually check for system requirements? 66 cat("**** Binary package requires libcurl and openssl\n") 67 cat("**** If installation fails, retry after installing those system requirements\n") 68 } 69 } else { 70 cat(sprintf("*** No C++ binaries found for %s\n", os)) 71 libfile <- NULL 72 } 73 } else { 74 libfile <- NULL 75 } 76 libfile 77} 78 79# Function to figure out which flavor of binary we should download, if at all. 80# By default (unset or "FALSE"), it will not download a precompiled library, 81# but you can override this by setting the env var LIBARROW_BINARY to: 82# * `TRUE` (not case-sensitive), to try to discover your current OS, or 83# * some other string, presumably a related "distro-version" that has binaries 84# built that work for your OS 85identify_os <- function(os = Sys.getenv("LIBARROW_BINARY", Sys.getenv("LIBARROW_DOWNLOAD"))) { 86 if (tolower(os) %in% c("", "false")) { 87 # Env var says not to download a binary 88 return(NULL) 89 } else if (!identical(tolower(os), "true")) { 90 # Env var provided an os-version to use--maybe you're on Ubuntu 18.10 but 91 # we only build for 18.04 and that's fine--so use what the user set 92 return(os) 93 } 94 95 linux <- distro() 96 if (is.null(linux)) { 97 cat("*** Unable to identify current OS/version\n") 98 return(NULL) 99 } 100 paste(linux$id, linux$short_version, sep = "-") 101} 102 103#### start distro #### 104 105distro <- function() { 106 # The code in this script is a (potentially stale) copy of the distro package 107 if (requireNamespace("distro", quietly = TRUE)) { 108 # Use the version from the package, which may be updated from this 109 return(distro::distro()) 110 } 111 112 out <- lsb_release() 113 if (is.null(out)) { 114 out <- os_release() 115 if (is.null(out)) { 116 out <- system_release() 117 } 118 } 119 if (is.null(out)) { 120 return(NULL) 121 } 122 123 out$id <- tolower(out$id) 124 if (grepl("bullseye", out$codename)) { 125 # debian unstable doesn't include a number but we can map from pretty name 126 out$short_version <- "11" 127 } else if (out$id == "ubuntu") { 128 # Keep major.minor version 129 out$short_version <- sub('^"?([0-9]+\\.[0-9]+).*"?.*$', "\\1", out$version) 130 } else { 131 # Only major version number 132 out$short_version <- sub('^"?([0-9]+).*"?.*$', "\\1", out$version) 133 } 134 out 135} 136 137lsb_release <- function() { 138 if (have_lsb_release()) { 139 list( 140 id = call_lsb("-is"), 141 version = call_lsb("-rs"), 142 codename = call_lsb("-cs") 143 ) 144 } else { 145 NULL 146 } 147} 148 149have_lsb_release <- function() nzchar(Sys.which("lsb_release")) 150call_lsb <- function(args) system(paste("lsb_release", args), intern = TRUE) 151 152os_release <- function() { 153 rel_data <- read_os_release() 154 if (!is.null(rel_data)) { 155 vals <- as.list(sub('^.*="?(.*?)"?$', "\\1", rel_data)) 156 names(vals) <- sub("^(.*)=.*$", "\\1", rel_data) 157 158 out <- list( 159 id = vals[["ID"]], 160 version = vals[["VERSION_ID"]] 161 ) 162 if ("VERSION_CODENAME" %in% names(vals)) { 163 out$codename <- vals[["VERSION_CODENAME"]] 164 } else { 165 # This probably isn't right, maybe could extract codename from pretty name? 166 out$codename = vals[["PRETTY_NAME"]] 167 } 168 out 169 } else { 170 NULL 171 } 172} 173 174read_os_release <- function() { 175 if (file.exists("/etc/os-release")) { 176 readLines("/etc/os-release") 177 } 178} 179 180system_release <- function() { 181 rel_data <- read_system_release() 182 if (!is.null(rel_data)) { 183 # Something like "CentOS Linux release 7.7.1908 (Core)" 184 list( 185 id = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\1", rel_data), 186 version = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\2", rel_data), 187 codename = NA 188 ) 189 } else { 190 NULL 191 } 192} 193 194read_system_release <- function() utils::head(readLines("/etc/system-release"), 1) 195 196#### end distro #### 197 198find_available_binary <- function(os) { 199 # Download a csv that maps one to the other, columns "actual" and "use_this" 200 u <- "https://raw.githubusercontent.com/ursa-labs/arrow-r-nightly/master/linux/distro-map.csv" 201 lookup <- try(utils::read.csv(u, stringsAsFactors = FALSE), silent = quietly) 202 if (!inherits(lookup, "try-error") && os %in% lookup$actual) { 203 new <- lookup$use_this[lookup$actual == os] 204 if (length(new) == 1 && !is.na(new)) { # Just some sanity checking 205 cat(sprintf("*** Using %s binary for %s\n", new, os)) 206 os <- new 207 } 208 } 209 os 210} 211 212download_source <- function() { 213 tf1 <- tempfile() 214 src_dir <- tempfile() 215 if (bintray_download(tf1)) { 216 # First try from bintray 217 cat("*** Successfully retrieved C++ source\n") 218 unzip(tf1, exdir = src_dir) 219 unlink(tf1) 220 src_dir <- paste0(src_dir, "/cpp") 221 } else if (apache_download(tf1)) { 222 # If that fails, try for an official release 223 cat("*** Successfully retrieved C++ source\n") 224 untar(tf1, exdir = src_dir) 225 unlink(tf1) 226 src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp") 227 } 228 229 if (dir.exists(src_dir)) { 230 options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir)) 231 # These scripts need to be executable 232 system( 233 sprintf("chmod 755 %s/build-support/*.sh", src_dir), 234 ignore.stdout = quietly, ignore.stderr = quietly 235 ) 236 return(src_dir) 237 } else { 238 return(NULL) 239 } 240} 241 242bintray_download <- function(destfile) { 243 source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip") 244 try_download(source_url, destfile) 245} 246 247apache_download <- function(destfile, n_mirrors = 3) { 248 apache_path <- paste0("arrow/arrow-", VERSION, "/apache-arrow-", VERSION, ".tar.gz") 249 apache_urls <- c( 250 # This returns a different mirror each time 251 rep("https://www.apache.org/dyn/closer.lua?action=download&filename=", n_mirrors), 252 "https://downloads.apache.org/" # The backup 253 ) 254 downloaded <- FALSE 255 for (u in apache_urls) { 256 downloaded <- try_download(paste0(u, apache_path), destfile) 257 if (downloaded) { 258 break 259 } 260 } 261 downloaded 262} 263 264find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) { 265 if (file.exists(paste0(arrow_home, "/cpp/src/arrow/api.h"))) { 266 # We're in a git checkout of arrow, so we can build it 267 cat("*** Found local C++ source\n") 268 return(paste0(arrow_home, "/cpp")) 269 } else { 270 return(NULL) 271 } 272} 273 274build_libarrow <- function(src_dir, dst_dir) { 275 # We'll need to compile R bindings with these libs, so delete any .o files 276 system("rm src/*.o", ignore.stdout = TRUE, ignore.stderr = TRUE) 277 # Set up make for parallel building 278 makeflags <- Sys.getenv("MAKEFLAGS") 279 if (makeflags == "") { 280 # CRAN policy says not to use more than 2 cores during checks 281 # If you have more and want to use more, set MAKEFLAGS 282 ncores <- min(parallel::detectCores(), 2) 283 makeflags <- sprintf("-j%s", ncores) 284 Sys.setenv(MAKEFLAGS = makeflags) 285 } 286 if (!quietly) { 287 cat("*** Building with MAKEFLAGS=", makeflags, "\n") 288 } 289 # Check for libarrow build dependencies: 290 # * cmake 291 cmake <- ensure_cmake() 292 293 # Optionally build somewhere not in tmp so we can dissect the build if it fails 294 debug_dir <- Sys.getenv("LIBARROW_DEBUG_DIR") 295 if (nzchar(debug_dir)) { 296 build_dir <- debug_dir 297 } else { 298 # But normally we'll just build in a tmp dir 299 build_dir <- tempfile() 300 } 301 options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir)) 302 303 R_CMD_config <- function(var) { 304 if (getRversion() < 3.4) { 305 # var names were called CXX1X instead of CXX11 306 var <- sub("^CXX11", "CXX1X", var) 307 } 308 # tools::Rcmd introduced R 3.3 309 tools::Rcmd(paste("config", var), stdout = TRUE) 310 } 311 env_var_list <- c( 312 SOURCE_DIR = src_dir, 313 BUILD_DIR = build_dir, 314 DEST_DIR = dst_dir, 315 CMAKE = cmake, 316 # Make sure we build with the same compiler settings that R is using 317 CC = R_CMD_config("CC"), 318 CXX = paste(R_CMD_config("CXX11"), R_CMD_config("CXX11STD")), 319 # CXXFLAGS = R_CMD_config("CXX11FLAGS"), # We don't want the same debug symbols 320 LDFLAGS = R_CMD_config("LDFLAGS") 321 ) 322 env_vars <- paste0(names(env_var_list), '="', env_var_list, '"', collapse = " ") 323 env_vars <- with_s3_support(env_vars) 324 env_vars <- with_mimalloc(env_vars) 325 if (tolower(Sys.info()[["sysname"]]) %in% "sunos") { 326 # jemalloc doesn't seem to build on Solaris 327 env_vars <- paste(env_vars, "ARROW_JEMALLOC=OFF") 328 } 329 cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n") 330 status <- system( 331 paste(env_vars, "inst/build_arrow_static.sh"), 332 ignore.stdout = quietly, ignore.stderr = quietly 333 ) 334 if (status != 0) { 335 # It failed :( 336 cat("**** Error building Arrow C++. Re-run with ARROW_R_DEV=true for debug information.\n") 337 } 338 invisible(status) 339} 340 341ensure_cmake <- function() { 342 cmake <- find_cmake(c( 343 Sys.getenv("CMAKE"), 344 Sys.which("cmake"), 345 Sys.which("cmake3") 346 )) 347 348 if (is.null(cmake)) { 349 # If not found, download it 350 cat("**** cmake\n") 351 CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.19.2") 352 cmake_binary_url <- paste0( 353 "https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION, 354 "/cmake-", CMAKE_VERSION, "-Linux-x86_64.tar.gz" 355 ) 356 cmake_tar <- tempfile() 357 cmake_dir <- tempfile() 358 try_download(cmake_binary_url, cmake_tar) 359 untar(cmake_tar, exdir = cmake_dir) 360 unlink(cmake_tar) 361 options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir)) 362 cmake <- paste0( 363 cmake_dir, 364 "/cmake-", CMAKE_VERSION, "-Linux-x86_64", 365 "/bin/cmake" 366 ) 367 } 368 cmake 369} 370 371find_cmake <- function(paths, version_required = 3.2) { 372 # Given a list of possible cmake paths, return the first one that exists and is new enough 373 for (path in paths) { 374 if (nzchar(path) && cmake_version(path) >= version_required) { 375 # Sys.which() returns a named vector, but that plays badly with c() later 376 names(path) <- NULL 377 return(path) 378 } 379 } 380 # If none found, return NULL 381 NULL 382} 383 384cmake_version <- function(cmd = "cmake") { 385 tryCatch( 386 { 387 raw_version <- system(paste(cmd, "--version"), intern = TRUE, ignore.stderr = TRUE) 388 pat <- ".* ([0-9\\.]+).*?" 389 which_line <- grep(pat, raw_version) 390 package_version(sub(pat, "\\1", raw_version[which_line])) 391 }, 392 error = function(e) return(0) 393 ) 394} 395 396with_s3_support <- function(env_vars) { 397 arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false" 398 if (arrow_s3) { 399 # User wants S3 support. If they're using gcc, let's make sure the version is >= 4.9 400 # and make sure that we have curl and openssl system libs 401 if (isTRUE(cmake_gcc_version(env_vars) < "4.9")) { 402 cat("**** S3 support not available for gcc < 4.9; building with ARROW_S3=OFF\n") 403 arrow_s3 <- FALSE 404 } else if (!cmake_find_package("CURL", NULL, env_vars)) { 405 cat("**** S3 support requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb); building with ARROW_S3=OFF\n") 406 arrow_s3 <- FALSE 407 } else if (!cmake_find_package("OpenSSL", "1.0.2", env_vars)) { 408 cat("**** S3 support requires openssl-devel (rpm) or libssl-dev (deb), version >= 1.0.2; building with ARROW_S3=OFF\n") 409 arrow_s3 <- FALSE 410 } 411 } 412 paste(env_vars, ifelse(arrow_s3, "ARROW_S3=ON", "ARROW_S3=OFF")) 413} 414 415with_mimalloc <- function(env_vars) { 416 arrow_mimalloc <- toupper(Sys.getenv("ARROW_MIMALLOC")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false" 417 if (arrow_mimalloc) { 418 # User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9 419 if (isTRUE(cmake_gcc_version(env_vars) < "4.9")) { 420 cat("**** mimalloc support not available for gcc < 4.9; building with ARROW_MIMALLOC=OFF\n") 421 arrow_mimalloc <- FALSE 422 } 423 } 424 paste(env_vars, ifelse(arrow_mimalloc, "ARROW_MIMALLOC=ON", "ARROW_MIMALLOC=OFF")) 425} 426 427cmake_gcc_version <- function(env_vars) { 428 # This function returns NA if using a non-gcc compiler 429 # Always enclose calls to it in isTRUE() or isFALSE() 430 vals <- cmake_cxx_compiler_vars(env_vars) 431 if (!identical(vals[["CMAKE_CXX_COMPILER_ID"]], "GNU")) { 432 return(NA) 433 } 434 package_version(vals[["CMAKE_CXX_COMPILER_VERSION"]]) 435} 436 437cmake_cxx_compiler_vars <- function(env_vars) { 438 info <- system(paste("export", env_vars, "&& $CMAKE --system-information"), intern = TRUE) 439 info <- grep("^[A-Z_]* .*$", info, value = TRUE) 440 vals <- as.list(sub('^.*? "?(.*?)"?$', "\\1", info)) 441 names(vals) <- sub("^(.*?) .*$", "\\1", info) 442 vals[grepl("^CMAKE_CXX_COMPILER_?", names(vals))] 443} 444 445cmake_find_package <- function(pkg, version = NULL, env_vars) { 446 td <- tempfile() 447 dir.create(td) 448 options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td)) 449 find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)") 450 writeLines(find_package, file.path(td, "CMakeLists.txt")) 451 cmake_cmd <- paste0( 452 "export ", env_vars, 453 " && cd ", td, 454 " && $CMAKE ", 455 " -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON", 456 " -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON", 457 " ." 458 ) 459 system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0 460} 461 462##### 463 464if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { 465 # If we're working in a local checkout and have already built the libs, we 466 # don't need to do anything. Otherwise, 467 # (1) Look for a prebuilt binary for this version 468 bin_file <- src_dir <- NULL 469 if (download_ok && binary_ok) { 470 bin_file <- download_binary() 471 } 472 if (!is.null(bin_file)) { 473 # Extract them 474 dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE) 475 unzip(bin_file, exdir = dst_dir) 476 unlink(bin_file) 477 } else if (build_ok) { 478 # (2) Find source and build it 479 if (download_ok) { 480 src_dir <- download_source() 481 } 482 if (is.null(src_dir)) { 483 src_dir <- find_local_source() 484 } 485 if (!is.null(src_dir)) { 486 cat("*** Building C++ libraries\n") 487 build_libarrow(src_dir, dst_dir) 488 } else { 489 cat("*** Proceeding without C++ dependencies\n") 490 } 491 } else { 492 cat("*** Proceeding without C++ dependencies\n") 493 } 494} 495