1library(testthat) 2library(recipes) 3 4n <- 100 5set.seed(424) 6dat <- matrix(rnorm(n*5), ncol = 5) 7dat <- as.data.frame(dat) 8dat$duplicate <- dat$V1 9dat$V6 <- -dat$V2 + runif(n)*.2 10 11test_that('high filter', { 12 set.seed(1) 13 rec <- recipe(~ ., data = dat) 14 filtering <- rec %>% 15 step_corr(all_predictors(), threshold = .5) 16 17 filtering_trained <- prep(filtering, training = dat, verbose = FALSE) 18 19 removed <- c("V6", "V1") 20 21 expect_equal(filtering_trained$steps[[1]]$removals, removed) 22}) 23 24test_that('low filter', { 25 rec <- recipe(~ ., data = dat) 26 filtering <- rec %>% 27 step_corr(all_predictors(), threshold = 1) 28 29 filtering_trained <- prep(filtering, training = dat, verbose = FALSE) 30 31 expect_equal(filtering_trained$steps[[1]]$removals, numeric(0)) 32}) 33 34test_that('many missing values', { 35 dat2 <- dat 36 dat2$V4 <- NA_real_ 37 rec <- recipe(~ ., data = dat2) 38 filtering <- rec %>% 39 step_corr(all_predictors(), threshold = .25) 40 41 expect_warning( 42 filtering_trained <- prep(filtering, training = dat2, verbose = FALSE), 43 "1 columns were excluded from the filter" 44 ) 45 46 expect_equal(filtering_trained$steps[[1]]$removals, paste0("V", 1:2)) 47}) 48 49test_that('occasional missing values', { 50 dat3 <- dat 51 dat3$V1[1] <- NA_real_ 52 dat3$V4[10] <- NA_real_ 53 rec <- recipe(~ ., data = dat3) 54 filtering <- rec %>% 55 step_corr(all_predictors(), threshold = .25, use = "everything") 56 57 expect_warning( 58 filtering_trained <- prep(filtering, training = dat3, verbose = FALSE), 59 "Some columns were excluded from the filter" 60 ) 61 62 expect_equal(filtering_trained$steps[[1]]$removals, "V2") 63}) 64 65 66test_that('printing', { 67 set.seed(1) 68 rec <- recipe(~ ., data = dat) 69 filtering <- rec %>% 70 step_corr(all_predictors(), threshold = .5) 71 expect_output(print(filtering)) 72 expect_output(prep(filtering, training = dat, verbose = TRUE)) 73}) 74 75 76test_that('tunable', { 77 rec <- 78 recipe(~ ., data = iris) %>% 79 step_corr(all_predictors()) 80 rec_param <- tunable.step_corr(rec$steps[[1]]) 81 expect_equal(rec_param$name, c("threshold")) 82 expect_true(all(rec_param$source == "recipe")) 83 expect_true(is.list(rec_param$call_info)) 84 expect_equal(nrow(rec_param), 1) 85 expect_equal( 86 names(rec_param), 87 c('name', 'call_info', 'source', 'component', 'component_id') 88 ) 89}) 90