1library(testthat)
2library(recipes)
3
4n <- 100
5set.seed(424)
6dat <- matrix(rnorm(n*5), ncol =  5)
7dat <- as.data.frame(dat)
8dat$duplicate <- dat$V1
9dat$V6 <- -dat$V2 + runif(n)*.2
10
11test_that('high filter', {
12  set.seed(1)
13  rec <- recipe(~ ., data = dat)
14  filtering <- rec %>%
15    step_corr(all_predictors(), threshold = .5)
16
17  filtering_trained <- prep(filtering, training = dat, verbose = FALSE)
18
19  removed <- c("V6", "V1")
20
21  expect_equal(filtering_trained$steps[[1]]$removals, removed)
22})
23
24test_that('low filter', {
25  rec <- recipe(~ ., data = dat)
26  filtering <- rec %>%
27    step_corr(all_predictors(), threshold = 1)
28
29  filtering_trained <- prep(filtering, training = dat, verbose = FALSE)
30
31  expect_equal(filtering_trained$steps[[1]]$removals, numeric(0))
32})
33
34test_that('many missing values', {
35  dat2 <- dat
36  dat2$V4 <- NA_real_
37  rec <- recipe(~ ., data = dat2)
38  filtering <- rec %>%
39    step_corr(all_predictors(), threshold = .25)
40
41  expect_warning(
42    filtering_trained <- prep(filtering, training = dat2, verbose = FALSE),
43    "1 columns were excluded from the filter"
44  )
45
46  expect_equal(filtering_trained$steps[[1]]$removals, paste0("V", 1:2))
47})
48
49test_that('occasional missing values', {
50  dat3 <- dat
51  dat3$V1[1] <- NA_real_
52  dat3$V4[10] <- NA_real_
53  rec <- recipe(~ ., data = dat3)
54  filtering <- rec %>%
55    step_corr(all_predictors(), threshold = .25, use = "everything")
56
57  expect_warning(
58    filtering_trained <- prep(filtering, training = dat3, verbose = FALSE),
59    "Some columns were excluded from the filter"
60  )
61
62  expect_equal(filtering_trained$steps[[1]]$removals, "V2")
63})
64
65
66test_that('printing', {
67  set.seed(1)
68  rec <- recipe(~ ., data = dat)
69  filtering <- rec %>%
70    step_corr(all_predictors(), threshold = .5)
71  expect_output(print(filtering))
72  expect_output(prep(filtering, training = dat, verbose = TRUE))
73})
74
75
76test_that('tunable', {
77  rec <-
78    recipe(~ ., data = iris) %>%
79    step_corr(all_predictors())
80  rec_param <- tunable.step_corr(rec$steps[[1]])
81  expect_equal(rec_param$name, c("threshold"))
82  expect_true(all(rec_param$source == "recipe"))
83  expect_true(is.list(rec_param$call_info))
84  expect_equal(nrow(rec_param), 1)
85  expect_equal(
86    names(rec_param),
87    c('name', 'call_info', 'source', 'component', 'component_id')
88  )
89})
90