1library(testthat)
2library(recipes)
3library(dplyr)
4library(modeldata)
5
6## -----------------------------------------------------------------------------
7
8data(biomass, package = "modeldata")
9
10biom_tr <- biomass %>% dplyr::filter(dataset == "Training") %>% dplyr::select(-dataset, -sample)
11biom_te <- biomass %>% dplyr::filter(dataset == "Testing")  %>% dplyr::select(-dataset, -sample, -HHV)
12
13data(cells, package = "modeldata")
14
15cell_tr <- cells %>% dplyr::filter(case == "Train") %>% dplyr::select(-case)
16cell_te <- cells %>% dplyr::filter(case == "Test")  %>% dplyr::select(-case, -class)
17
18load(test_path("test_pls_new.RData"))
19
20
21## -----------------------------------------------------------------------------
22
23test_that('PLS, dense loadings', {
24  skip_if_not_installed("mixOmics")
25  rec <- recipe(HHV ~ ., data = biom_tr) %>%
26    step_pls(all_predictors(), outcome = "HHV", num_comp = 3)
27
28  rec <- prep(rec)
29
30  expect_equal(
31    names(rec$steps[[1]]$res),
32    c("mu", "sd", "coefs", "col_norms")
33  )
34
35  tr_new <- juice(rec, all_predictors())
36  expect_equal(tr_new, bm_pls_tr)
37  te_new <- bake(rec, biom_te)
38  expect_equal(te_new, bm_pls_te)
39})
40
41
42test_that('PLS, sparse loadings', {
43  skip_if_not_installed("mixOmics")
44  rec <- recipe(HHV ~ ., data = biom_tr) %>%
45    step_pls(all_predictors(), outcome = "HHV", num_comp = 3, predictor_prop = 3/5)
46
47  rec <- prep(rec)
48
49  expect_equal(
50    names(rec$steps[[1]]$res),
51    c("mu", "sd", "coefs", "col_norms")
52  )
53
54  tr_new <- juice(rec, all_predictors())
55  expect_equal(tr_new, bm_spls_tr)
56  te_new <- bake(rec, biom_te)
57  expect_equal(te_new, bm_spls_te)
58})
59
60## -----------------------------------------------------------------------------
61
62test_that('PLS-DA, dense loadings', {
63  skip_if_not_installed("mixOmics")
64  rec <- recipe(class ~ ., data = cell_tr) %>%
65    step_pls(all_predictors(), outcome = "class", num_comp = 3)
66
67  rec <- prep(rec)
68
69  expect_equal(
70    names(rec$steps[[1]]$res),
71    c("mu", "sd", "coefs", "col_norms")
72  )
73
74  tr_new <- juice(rec, all_predictors())
75  expect_equal(tr_new, cell_plsda_tr)
76  te_new <- bake(rec, cell_te)
77  expect_equal(te_new, cell_plsda_te)
78})
79
80
81test_that('PLS-DA, sparse loadings', {
82  skip_if_not_installed("mixOmics")
83  rec <- recipe(class ~ ., data = cell_tr) %>%
84    step_pls(all_predictors(), outcome = "class", num_comp = 3, predictor_prop = 50/56)
85
86  rec <- prep(rec)
87
88  expect_equal(
89    names(rec$steps[[1]]$res),
90    c("mu", "sd", "coefs", "col_norms")
91  )
92
93  tr_new <- juice(rec, all_predictors())
94  expect_equal(tr_new, cell_splsda_tr)
95  te_new <- bake(rec, cell_te)
96  expect_equal(te_new, cell_splsda_te)
97})
98
99## -----------------------------------------------------------------------------
100
101test_that('No PLS', {
102  skip_if_not_installed("mixOmics")
103  rec <- recipe(class ~ ., data = cell_tr) %>%
104    step_pls(all_predictors(), outcome = "class", num_comp = 0)
105
106  rec <- prep(rec)
107
108  expect_equal(
109    names(rec$steps[[1]]$res),
110    c("x_vars", "y_vars")
111  )
112  pred_names <- summary(rec)$variable[summary(rec)$role == "predictor"]
113
114  tr_new <- juice(rec, all_predictors())
115  expect_equal(names(tr_new), pred_names)
116  te_new <- bake(rec, cell_te, all_predictors())
117  expect_equal(names(te_new), pred_names)
118})
119
120## -----------------------------------------------------------------------------
121
122test_that('tidy method', {
123  skip_if_not_installed("mixOmics")
124  rec <- recipe(HHV ~ ., data = biom_tr) %>%
125    step_pls(all_predictors(), outcome = "HHV", num_comp = 3, id = "dork")
126
127  tidy_pre <- tidy(rec, number = 1)
128  exp_pre <- tibble::tribble(
129    ~terms, ~value, ~component,    ~id,
130    "all_predictors()",     NA_real_,         NA_character_, "dork"
131  )
132  expect_equal(tidy_pre, exp_pre)
133
134  rec <- prep(rec)
135  tidy_post <- tidy(rec, number = 1)
136  exp_post <-
137    tibble::tribble(
138      ~terms,             ~value, ~component,    ~id,
139      "carbon",    0.82813459059393,      "PLS1", "dork",
140      "carbon",    0.718469477422311,     "PLS2", "dork",
141      "carbon",    0.476111929729498,     "PLS3", "dork",
142      "hydrogen", -0.206963356355556,     "PLS1", "dork",
143      "hydrogen",  0.642998926998282,     "PLS2", "dork",
144      "hydrogen",  0.262836631090453,     "PLS3", "dork",
145      "oxygen",   -0.49241242430895,      "PLS1", "dork",
146      "oxygen",    0.299176769170812,     "PLS2", "dork",
147      "oxygen",    0.418081563632953,     "PLS3", "dork",
148      "nitrogen", -0.122633995804743,     "PLS1", "dork",
149      "nitrogen", -0.172719084680244,     "PLS2", "dork",
150      "nitrogen",  0.642403301090588,     "PLS3", "dork",
151      "sulfur",    0.11768677260853,      "PLS1", "dork",
152      "sulfur",   -0.217341766567037,     "PLS2", "dork",
153      "sulfur",    0.521114256955661,     "PLS3", "dork"
154    )
155  expect_equal(tidy_post, exp_post, tolerance = 0.01)
156})
157
158## -----------------------------------------------------------------------------
159
160test_that('print method', {
161  skip_if_not_installed("mixOmics")
162  rec <- recipe(HHV ~ ., data = biom_tr) %>%
163    step_pls(all_predictors(), outcome = "HHV", num_comp = 3, id = "dork")
164
165  expect_output(print(rec), "feature extraction with all_predictors")
166
167  rec <- prep(rec)
168  expect_output(
169    print(rec),
170    "feature extraction with carbon, hydrogen, oxygen, nitrogen, sulfur"
171  )
172
173})
174
175test_that('keep_original_cols works', {
176
177  skip_if_not_installed("mixOmics")
178  pls_rec <- recipe(HHV ~ ., data = biom_tr) %>%
179    step_pls(all_predictors(), outcome = "HHV", num_comp = 3, keep_original_cols = TRUE)
180
181  pls_trained <- prep(pls_rec)
182  pls_pred <- bake(pls_trained, new_data = biom_te, all_predictors())
183
184  expect_equal(
185    colnames(pls_pred),
186    c("carbon", "hydrogen", "oxygen", "nitrogen", "sulfur",
187      "PLS1", "PLS2", "PLS3")
188  )
189})
190
191test_that('can prep recipes with no keep_original_cols', {
192  skip_if_not_installed("mixOmics")
193  pls_rec <- recipe(HHV ~ ., data = biom_tr) %>%
194    step_pls(all_predictors(), outcome = "HHV", num_comp = 3)
195
196  pls_rec$steps[[1]]$keep_original_cols <- NULL
197
198  expect_warning(
199    pls_trained <- prep(pls_rec, training = biom_tr, verbose = FALSE),
200    "'keep_original_cols' was added to"
201  )
202
203  expect_error(
204    pls_pred <- bake(pls_trained, new_data = biom_te, all_predictors()),
205    NA
206  )
207
208})
209