1# 2# Licensed to the Apache Software Foundation (ASF) under one or more 3# contributor license agreements. See the NOTICE file distributed with 4# this work for additional information regarding copyright ownership. 5# The ASF licenses this file to You under the Apache License, Version 2.0 6# (the "License"); you may not use this file except in compliance with 7# the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18context("test functions in sparkR.R") 19 20test_that("Check masked functions", { 21 # Check that we are not masking any new function from base, stats, testthat unexpectedly 22 # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it 23 # hard for users to use base R functions. Please check when in doubt. 24 namesOfMaskedCompletely <- c("cov", "filter", "sample") 25 namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var", 26 "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset", 27 "summary", "transform", "drop", "window", "as.data.frame", "union") 28 if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { 29 namesOfMasked <- c("endsWith", "startsWith", namesOfMasked) 30 } 31 masked <- conflicts(detail = TRUE)$`package:SparkR` 32 expect_true("describe" %in% masked) # only when with testthat.. 33 func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] }) 34 funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func) 35 maskedBySparkR <- masked[funcSparkROrEmpty] 36 expect_equal(length(maskedBySparkR), length(namesOfMasked)) 37 # make the 2 lists the same length so expect_equal will print their content 38 l <- max(length(maskedBySparkR), length(namesOfMasked)) 39 length(maskedBySparkR) <- l 40 length(namesOfMasked) <- l 41 expect_equal(sort(maskedBySparkR, na.last = TRUE), sort(namesOfMasked, na.last = TRUE)) 42 # above are those reported as masked when `library(SparkR)` 43 # note that many of these methods are still callable without base:: or stats:: prefix 44 # there should be a test for each of these, except followings, which are currently "broken" 45 funcHasAny <- unlist(lapply(masked, function(x) { 46 any(grepl("=\"ANY\"", capture.output(showMethods(x)[-1]))) 47 })) 48 maskedCompletely <- masked[!funcHasAny] 49 expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely)) 50 l <- max(length(maskedCompletely), length(namesOfMaskedCompletely)) 51 length(maskedCompletely) <- l 52 length(namesOfMaskedCompletely) <- l 53 expect_equal(sort(maskedCompletely, na.last = TRUE), 54 sort(namesOfMaskedCompletely, na.last = TRUE)) 55}) 56 57test_that("repeatedly starting and stopping SparkR", { 58 for (i in 1:4) { 59 sc <- suppressWarnings(sparkR.init()) 60 rdd <- parallelize(sc, 1:20, 2L) 61 expect_equal(countRDD(rdd), 20) 62 suppressWarnings(sparkR.stop()) 63 } 64}) 65 66test_that("repeatedly starting and stopping SparkSession", { 67 for (i in 1:4) { 68 sparkR.session(enableHiveSupport = FALSE) 69 df <- createDataFrame(data.frame(dummy = 1:i)) 70 expect_equal(count(df), i) 71 sparkR.session.stop() 72 } 73}) 74 75test_that("rdd GC across sparkR.stop", { 76 sc <- sparkR.sparkContext() # sc should get id 0 77 rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1 78 rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2 79 sparkR.session.stop() 80 81 sc <- sparkR.sparkContext() # sc should get id 0 again 82 83 # GC rdd1 before creating rdd3 and rdd2 after 84 rm(rdd1) 85 gc() 86 87 rdd3 <- parallelize(sc, 1:20, 2L) # rdd3 should get id 1 now 88 rdd4 <- parallelize(sc, 1:10, 2L) # rdd4 should get id 2 now 89 90 rm(rdd2) 91 gc() 92 93 countRDD(rdd3) 94 countRDD(rdd4) 95 sparkR.session.stop() 96}) 97 98test_that("job group functions can be called", { 99 sc <- sparkR.sparkContext() 100 setJobGroup("groupId", "job description", TRUE) 101 cancelJobGroup("groupId") 102 clearJobGroup() 103 104 suppressWarnings(setJobGroup(sc, "groupId", "job description", TRUE)) 105 suppressWarnings(cancelJobGroup(sc, "groupId")) 106 suppressWarnings(clearJobGroup(sc)) 107 sparkR.session.stop() 108}) 109 110test_that("utility function can be called", { 111 sparkR.sparkContext() 112 setLogLevel("ERROR") 113 sparkR.session.stop() 114}) 115 116test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", { 117 e <- new.env() 118 e[["spark.driver.memory"]] <- "512m" 119 ops <- getClientModeSparkSubmitOpts("sparkrmain", e) 120 expect_equal("--driver-memory \"512m\" sparkrmain", ops) 121 122 e[["spark.driver.memory"]] <- "5g" 123 e[["spark.driver.extraClassPath"]] <- "/opt/class_path" # nolint 124 e[["spark.driver.extraJavaOptions"]] <- "-XX:+UseCompressedOops -XX:+UseCompressedStrings" 125 e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib" # nolint 126 e[["random"]] <- "skipthis" 127 ops2 <- getClientModeSparkSubmitOpts("sparkr-shell", e) 128 # nolint start 129 expect_equal(ops2, paste0("--driver-class-path \"/opt/class_path\" --driver-java-options \"", 130 "-XX:+UseCompressedOops -XX:+UseCompressedStrings\" --driver-library-path \"", 131 "/usr/local/hadoop/lib\" --driver-memory \"5g\" sparkr-shell")) 132 # nolint end 133 134 e[["spark.driver.extraClassPath"]] <- "/" # too short 135 ops3 <- getClientModeSparkSubmitOpts("--driver-memory 4g sparkr-shell2", e) 136 # nolint start 137 expect_equal(ops3, paste0("--driver-java-options \"-XX:+UseCompressedOops ", 138 "-XX:+UseCompressedStrings\" --driver-library-path \"/usr/local/hadoop/lib\"", 139 " --driver-memory 4g sparkr-shell2")) 140 # nolint end 141}) 142 143test_that("sparkJars sparkPackages as comma-separated strings", { 144 expect_warning(processSparkJars(" a, b ")) 145 jars <- suppressWarnings(processSparkJars(" a, b ")) 146 expect_equal(lapply(jars, basename), list("a", "b")) 147 148 jars <- suppressWarnings(processSparkJars(" abc ,, def ")) 149 expect_equal(lapply(jars, basename), list("abc", "def")) 150 151 jars <- suppressWarnings(processSparkJars(c(" abc ,, def ", "", "xyz", " ", "a,b"))) 152 expect_equal(lapply(jars, basename), list("abc", "def", "xyz", "a", "b")) 153 154 p <- processSparkPackages(c("ghi", "lmn")) 155 expect_equal(p, c("ghi", "lmn")) 156 157 # check normalizePath 158 f <- dir()[[1]] 159 expect_warning(processSparkJars(f), NA) 160 expect_match(processSparkJars(f), f) 161}) 162 163test_that("spark.lapply should perform simple transforms", { 164 sparkR.sparkContext() 165 doubled <- spark.lapply(1:10, function(x) { 2 * x }) 166 expect_equal(doubled, as.list(2 * 1:10)) 167 sparkR.session.stop() 168}) 169 170test_that("add and get file to be downloaded with Spark job on every node", { 171 sparkR.sparkContext() 172 # Test add file. 173 path <- tempfile(pattern = "hello", fileext = ".txt") 174 filename <- basename(path) 175 words <- "Hello World!" 176 writeLines(words, path) 177 spark.addFile(path) 178 download_path <- spark.getSparkFiles(filename) 179 expect_equal(readLines(download_path), words) 180 181 # Test spark.getSparkFiles works well on executors. 182 seq <- seq(from = 1, to = 10, length.out = 5) 183 f <- function(seq) { spark.getSparkFiles(filename) } 184 results <- spark.lapply(seq, f) 185 for (i in 1:5) { expect_equal(basename(results[[i]]), filename) } 186 187 unlink(path) 188 189 # Test add directory recursively. 190 path <- paste0(tempdir(), "/", "recursive_dir") 191 dir.create(path) 192 dir_name <- basename(path) 193 path1 <- paste0(path, "/", "hello.txt") 194 file.create(path1) 195 sub_path <- paste0(path, "/", "sub_hello") 196 dir.create(sub_path) 197 path2 <- paste0(sub_path, "/", "sub_hello.txt") 198 file.create(path2) 199 words <- "Hello World!" 200 sub_words <- "Sub Hello World!" 201 writeLines(words, path1) 202 writeLines(sub_words, path2) 203 spark.addFile(path, recursive = TRUE) 204 download_path1 <- spark.getSparkFiles(paste0(dir_name, "/", "hello.txt")) 205 expect_equal(readLines(download_path1), words) 206 download_path2 <- spark.getSparkFiles(paste0(dir_name, "/", "sub_hello/sub_hello.txt")) 207 expect_equal(readLines(download_path2), sub_words) 208 unlink(path, recursive = TRUE) 209 sparkR.session.stop() 210}) 211