1library(testthat)
2library(recipes)
3library(tibble)
4
5library(modeldata)
6data(biomass)
7
8test_that('default method', {
9  rec <- recipe(x = biomass)
10  exp_res <- tibble(variable = colnames(biomass),
11                    type = rep(c("nominal", "numeric"), c(2, 6)),
12                    role = NA,
13                    source = "original")
14  expect_equal(summary(rec, TRUE), exp_res)
15})
16
17test_that('changing roles', {
18  rec <- recipe(x = biomass)
19  rec <- update_role(rec, sample, new_role = "some other role")
20  exp_res <- tibble(variable = colnames(biomass),
21                    type = rep(c("nominal", "numeric"), c(2, 6)),
22                    role = rep(c("some other role", NA), c(1, 7)),
23                    source = "original")
24  expect_equal(summary(rec, TRUE), exp_res)
25})
26
27test_that('change existing role', {
28  rec <- recipe(x = biomass)
29
30  expect_error(add_role(rec, sample, new_role = "some other role"))
31
32  rec <- update_role(rec, sample, new_role = "some other role")
33  rec <- update_role(rec, sample, new_role = "other other role")
34
35  exp_res <- tibble(variable = colnames(biomass),
36                    type = rep(c("nominal", "numeric"), c(2, 6)),
37                    role = rep(c("other other role", NA), c(1, 7)),
38                    source = "original")
39  expect_equal(summary(rec, TRUE), exp_res)
40})
41
42test_that('change only 1 role of variable with multiple roles', {
43  rec <- recipe(x = biomass)
44  rec <-
45    rec %>%
46    update_role(sample, new_role = "role 1") %>%
47    add_role(sample, new_role = "role 2")
48
49  orig_roles <- rec
50
51  # changes only 1
52  rec <- update_role(rec, sample, new_role = "role 3", old_role = "role 1")
53
54  exp_res <- summary(orig_roles)
55  exp_res$role[exp_res$role == "role 1"] <- "role 3"
56  expect_equal(summary(rec, TRUE), exp_res)
57})
58
59test_that('change every role of 2 variables', {
60  rec <- recipe(x = biomass)
61  rec <- update_role(rec, sample, dataset, new_role = "role 1")
62  orig_roles <- summary(rec)
63  rec <- update_role(rec, sample, dataset, new_role = "role 2")
64
65  exp_res <- orig_roles
66  exp_res$role[exp_res$role == "role 1"] <- "role 2"
67
68  expect_equal(summary(rec, TRUE), exp_res)
69})
70
71test_that('update only NA role', {
72  rec <- recipe(x = biomass)
73  orig_rec <- summary(rec)
74  rec <- update_role(rec, sample, dataset, new_role = "some other role")
75
76  exp_res <- orig_rec %>% arrange(variable)
77  exp_res$role[exp_res$variable %in% c("sample", "dataset")] <- "some other role"
78
79  expect_equal(summary(rec, TRUE) %>% arrange(variable), exp_res)
80})
81
82test_that('new role for existing NA role', {
83
84  rec <- recipe(x = biomass)
85  rec <- update_role(rec, sample, new_role = "some other role")
86
87  exp_res <- tibble(variable = colnames(biomass),
88                    type = rep(c("nominal", "numeric"), c(2, 6)),
89                    role = rep(c("some other role", NA), c(1, length(colnames(biomass)) - 1)),
90                    source = "original")
91  expect_equal(summary(rec, TRUE), exp_res)
92})
93
94test_that('new role with specified type', {
95
96  rec <- recipe(x = biomass)
97  rec <- update_role(rec, sample, new_role = "blah")
98  rec <- add_role(rec, sample, new_role = "some other role", new_type = "new type")
99
100  exp_res <- tibble(variable = c("sample", colnames(biomass)),
101                    type = c("nominal", "new type", "nominal", rep("numeric", 6)),
102                    role = rep(c("blah", "some other role", NA), c(1, 1, 7)),
103                    source = "original")
104  expect_equal(summary(rec, TRUE), exp_res)
105})
106
107test_that('add new role when two already exist with different types', {
108
109  # type of the first existing role found is used
110  rec <- recipe(x = biomass)
111  rec <- update_role(rec, sample, new_role = "blah")
112  rec <- add_role(rec, sample, new_role = "some other role", new_type = "new type")
113  rec <- add_role(rec, sample, new_role = "another role")
114
115  exp_res <- tibble(variable = c("sample", "sample", colnames(biomass)),
116                    type = c("nominal", "new type", "nominal", "nominal", rep("numeric", 6)),
117                    role = c("blah", "some other role", "another role", rep(NA, 7)),
118                    source = "original")
119  expect_equal(summary(rec, TRUE), exp_res)
120})
121
122test_that('existing role is skipped', {
123
124  rec <- recipe(x = biomass)
125  rec <- update_role(rec, sample, new_role = "blah")
126  rec <- add_role(rec, sample, new_role = "some other role")
127
128  # skip me
129  expect_warning(
130    rec <- add_role(rec, sample, new_role = "some other role"),
131    "Role, 'some other role', already exists"
132  )
133
134  # also tests the order, new roles come directly after old ones
135  exp_res <- tibble(variable = c("sample", colnames(biomass)),
136                    type = rep(c("nominal", "numeric"), c(3, 6)),
137                    role = rep(c("blah", "some other role", NA), c(1, 1, 7)),
138                    source = "original")
139  expect_equal(summary(rec, TRUE), exp_res)
140
141})
142
143test_that('existing role is skipped, but new one is added', {
144
145  rec <- recipe(x = biomass)
146  rec <- update_role(rec, sample, new_role = "blah")
147  rec <- add_role(rec, sample, new_role = "some other role")
148
149  # partially skip me
150  expect_warning(
151    rec <- add_role(rec, sample, dataset, new_role = "some other role"),
152    "Role, 'some other role', already exists"
153  )
154
155  exp_res <- tibble(variable = c(
156                      rep(c("sample", "dataset"), c(2,2)),
157                      setdiff(colnames(biomass), c("sample", "dataset"))
158                    ),
159                    type = rep(c("nominal", "numeric"), c(4, 6)),
160                    role = c("blah", "some other role", NA, "some other role", rep(NA, 6)),
161                    source = "original")
162  expect_equal(summary(rec, TRUE), exp_res)
163})
164
165test_that('cannot add roles if the current one is `NA`', {
166  rec <- recipe(x = biomass)
167  expect_error(add_role(rec, sample, sulfur), "No role currently exists")
168})
169
170test_that("`update_role()` cannot be ambiguous", {
171  rec <- recipe(HHV ~ ., data = biomass)
172  rec <- add_role(rec, sample, new_role = "x")
173
174  expect_error(
175    update_role(rec, sample, new_role = "y"),
176    "`old_role` can only be `NULL` when"
177  )
178})
179
180test_that("`new_role` cannot be `NA_character_`", {
181  rec <- recipe(x = biomass)
182
183  expect_error(
184    add_role(rec, sample, new_role = NA_character_),
185    "`new_role` must not be `NA`."
186  )
187
188  expect_error(
189    update_role(rec, sample, new_role = NA_character_),
190    "`new_role` must not be `NA`."
191  )
192})
193
194test_that('remove roles', {
195
196  rec <- recipe(x = biomass)
197  rec <- update_role(rec, sample, new_role = "role1")
198  expect_error(
199    rec <- remove_role(rec, sample, old_role = NA)
200  )
201  expect_error(
202    rec <- remove_role(rec, sample)
203  )
204
205  expect_warning(
206    remove_role(rec, sample, old_role = "non-existant"),
207    "Column, 'sample', does not have role, 'non-existant'."
208  )
209
210  rec <- remove_role(rec, sample, old_role = "role1")
211
212  exp_res <- tibble(variable = colnames(biomass),
213                    type = rep(c("nominal", "numeric"), c(2, 6)),
214                    role = NA_character_,
215                    source = "original")
216  expect_equal(summary(rec, TRUE), exp_res)
217
218})
219
220test_that('New type for an existing role can be added', {
221
222  rec <- recipe(x = biomass)
223  rec <- update_role(rec, sample, new_role = "role1")
224  rec <- add_role(rec, sample, new_role = "role1", new_type = "text")
225
226  exp_res <- tibble(variable = c("sample", colnames(biomass)),
227                    type = c(c("nominal", "text", "nominal"), rep("numeric", 6)),
228                    role = c("role1", "role1", rep(NA, 7)),
229                    source = "original")
230  expect_equal(summary(rec, TRUE), exp_res)
231
232})
233
234test_that("can use tidyselect ops in role selection", {
235  rec <- recipe(mpg ~ ., mtcars) %>%
236    step_center(all_predictors())
237
238  # Swap "predictor" for "foo"
239  rec <- update_role(
240    rec,
241    starts_with("c") & !carb,
242    new_role = "foo",
243    old_role = "predictor"
244  )
245
246  expect_identical(
247    rec$term_info$role[rec$term_info$variable == "cyl"],
248    "foo"
249  )
250
251  # Add "predictor" back
252  rec <- add_role(
253    rec,
254    starts_with("c") & !carb,
255    new_role = "predictor"
256  )
257
258  expect_identical(
259    rec$term_info$role[rec$term_info$variable == "cyl"],
260    c("foo", "predictor")
261  )
262
263  # Remove "foo"
264  rec <- remove_role(
265    rec,
266    starts_with("c") & !carb,
267    old_role = "foo"
268  )
269
270  expect_identical(
271    rec$term_info$role[rec$term_info$variable == "cyl"],
272    "predictor"
273  )
274})
275
276
277test_that("empty dots and zero column selections return input with a warning", {
278  rec <- recipe(x = biomass)
279
280  expect_warning(
281    rec2 <- add_role(rec),
282    "No columns were selected in `add_role[(][)]`"
283  )
284  expect_identical(rec2, rec)
285
286  expect_warning(
287    rec2 <- update_role(rec),
288    "No columns were selected in `update_role[(][)]`"
289  )
290  expect_identical(rec2, rec)
291
292  expect_warning(
293    rec2 <- remove_role(rec, old_role = "foo"),
294    "No columns were selected in `remove_role[(][)]`"
295  )
296  expect_identical(rec2, rec)
297
298  expect_warning(
299    rec2 <- add_role(rec, starts_with("foobar")),
300    "No columns were selected in `add_role[(][)]`"
301  )
302  expect_identical(rec2, rec)
303
304  expect_warning(
305    rec2 <- update_role(rec, starts_with("foobar")),
306    "No columns were selected in `update_role[(][)]`"
307  )
308  expect_identical(rec2, rec)
309
310  expect_warning(
311    rec2 <- remove_role(rec, starts_with("foobar"), old_role = "foo"),
312    "No columns were selected in `remove_role[(][)]`"
313  )
314  expect_identical(rec2, rec)
315})
316
317test_that('bad args', {
318  expect_error(
319    recipe(x = biomass) %>%
320      add_role(carbon, new_role = letters[1:2]),
321    "`new_role` must have length 1."
322  )
323
324  expect_error(
325    recipe(x = biomass) %>%
326      add_role(carbon, new_role = "a", new_type = letters[1:2]),
327    "`new_type` must have length 1."
328  )
329
330  expect_error(
331    recipe(x = biomass) %>%
332      update_role(carbon, new_role = c("a", "b")),
333    "`new_role` must have length 1."
334  )
335
336  expect_error(
337    recipe(x = biomass) %>%
338      update_role(carbon, old_role = c("a", "b")),
339    "`old_role` must have length 1."
340  )
341
342})
343
344
345# ------------------------------------------------------------------------------
346# Multiples roles + Selection testing
347
348test_that("adding multiple roles/types does not duplicate prepped columns", {
349
350  rec <- recipe(HHV ~ ., data = biomass)
351
352  # second role
353  expect_equal(
354    rec %>%
355      add_role(carbon, new_role = "carb") %>%
356      prep(training = biomass) %>%
357      juice() %>%
358      ncol(),
359
360    8
361  )
362
363  # second type
364  expect_equal(
365    rec %>%
366      add_role(carbon, new_type = "carb") %>%
367      prep(training = biomass) %>%
368      juice() %>%
369      ncol(),
370
371    8
372  )
373
374})
375
376test_that("type selectors can be combined", {
377
378  rec <- recipe(HHV ~ ., data = biomass)
379
380  prepped <- rec %>%
381    add_role(carbon, new_role = "predictor", new_type = "carb") %>%
382    step_center(all_numeric(), -has_type("carb")) %>%
383    prep(training = biomass)
384
385  expect_equal(
386    names(prepped$steps[[1]]$means),
387    c("hydrogen", "oxygen", "nitrogen", "sulfur", "HHV")
388  )
389
390})
391
392test_that("step_rm() removes ALL mention of variables with that role", {
393
394  rec <- recipe(HHV ~ ., data = biomass)
395
396  rec_prepped <- rec %>%
397    add_role(carbon, new_role = "predictor", new_type = "carb") %>%
398    step_rm(has_type("carb")) %>%
399    prep(training = biomass) %>%
400    summary()
401
402  expect_false("carbon" %in% rec_prepped$variable)
403})
404
405# ------------------------------------------------------------------------------
406# Tests related to #296
407# https://github.com/tidymodels/recipes/issues/296
408
409test_that("Existing `NA` roles are not modified in prep() when new columns are generated", {
410
411  rec_dummy <- recipe(x = iris) %>%
412    update_role(Sepal.Length, new_role = "outcome") %>%
413    update_role(Species, new_role = "predictor") %>%
414    step_dummy(Species)
415
416  prepped_rec_dummy <- prep(rec_dummy, iris)
417
418  orig <- summary(rec_dummy)
419  new <- summary(prepped_rec_dummy)
420
421  # These should be identical except for the modified Species term
422  expect_equal(
423    filter(orig, !grepl("Species", variable)),
424    filter(new, !grepl("Species", variable))
425  )
426
427  expect_equal(
428    filter(new, grepl("Species", variable)),
429    tibble(
430      variable = c("Species_versicolor", "Species_virginica"),
431      type = rep("numeric", times = 2),
432      role = rep("predictor", times = 2),
433      source = rep("derived", times = 2)
434    )
435  )
436
437  # Juicing with all predictors should only give these two columns
438  expect_equal(
439    colnames(juice(prepped_rec_dummy, all_predictors())),
440    c("Species_versicolor", "Species_virginica")
441  )
442
443})
444
445
446test_that("Existing `NA` roles are not modified in prep() when multiple new columns are generated", {
447
448  rec <- recipe(x = iris) %>%
449    update_role(Sepal.Length, new_role = "outcome") %>%
450    update_role(Sepal.Width, new_role = "predictor") %>%
451    update_role(Species, new_role = "predictor") %>%
452    step_dummy(Species) %>%
453    step_bs(Sepal.Width)
454
455  prepped_rec <- prep(rec, iris)
456
457  orig <- summary(rec)
458  new <- summary(prepped_rec)
459
460  # These should be identical except for the
461  # modified Species and Sepal.Width terms
462  expect_equal(
463    filter(orig, !grepl("Species", variable), !grepl("Sepal.Width", variable)),
464    filter(new, !grepl("Species", variable), !grepl("Sepal.Width", variable))
465  )
466
467})
468
469test_that("Roles are correcly selected in bake", {
470
471  x <- tibble::tibble(
472    a = runif(10),
473    b = runif(10),
474    c = runif(10)
475  )
476
477  rec <- recipe(c ~ ., x) %>%
478    update_role(b, new_role = "id") %>%
479    add_role(a, new_role = "id") %>%
480    prep()
481
482  o <- recipes::bake(rec, x, recipes::has_role("id"))
483  expect_equal(names(o), c("a", "b"))
484
485})
486
487