1library(testthat) 2library(recipes) 3library(tibble) 4 5library(modeldata) 6data(biomass) 7 8test_that('default method', { 9 rec <- recipe(x = biomass) 10 exp_res <- tibble(variable = colnames(biomass), 11 type = rep(c("nominal", "numeric"), c(2, 6)), 12 role = NA, 13 source = "original") 14 expect_equal(summary(rec, TRUE), exp_res) 15}) 16 17test_that('changing roles', { 18 rec <- recipe(x = biomass) 19 rec <- update_role(rec, sample, new_role = "some other role") 20 exp_res <- tibble(variable = colnames(biomass), 21 type = rep(c("nominal", "numeric"), c(2, 6)), 22 role = rep(c("some other role", NA), c(1, 7)), 23 source = "original") 24 expect_equal(summary(rec, TRUE), exp_res) 25}) 26 27test_that('change existing role', { 28 rec <- recipe(x = biomass) 29 30 expect_error(add_role(rec, sample, new_role = "some other role")) 31 32 rec <- update_role(rec, sample, new_role = "some other role") 33 rec <- update_role(rec, sample, new_role = "other other role") 34 35 exp_res <- tibble(variable = colnames(biomass), 36 type = rep(c("nominal", "numeric"), c(2, 6)), 37 role = rep(c("other other role", NA), c(1, 7)), 38 source = "original") 39 expect_equal(summary(rec, TRUE), exp_res) 40}) 41 42test_that('change only 1 role of variable with multiple roles', { 43 rec <- recipe(x = biomass) 44 rec <- 45 rec %>% 46 update_role(sample, new_role = "role 1") %>% 47 add_role(sample, new_role = "role 2") 48 49 orig_roles <- rec 50 51 # changes only 1 52 rec <- update_role(rec, sample, new_role = "role 3", old_role = "role 1") 53 54 exp_res <- summary(orig_roles) 55 exp_res$role[exp_res$role == "role 1"] <- "role 3" 56 expect_equal(summary(rec, TRUE), exp_res) 57}) 58 59test_that('change every role of 2 variables', { 60 rec <- recipe(x = biomass) 61 rec <- update_role(rec, sample, dataset, new_role = "role 1") 62 orig_roles <- summary(rec) 63 rec <- update_role(rec, sample, dataset, new_role = "role 2") 64 65 exp_res <- orig_roles 66 exp_res$role[exp_res$role == "role 1"] <- "role 2" 67 68 expect_equal(summary(rec, TRUE), exp_res) 69}) 70 71test_that('update only NA role', { 72 rec <- recipe(x = biomass) 73 orig_rec <- summary(rec) 74 rec <- update_role(rec, sample, dataset, new_role = "some other role") 75 76 exp_res <- orig_rec %>% arrange(variable) 77 exp_res$role[exp_res$variable %in% c("sample", "dataset")] <- "some other role" 78 79 expect_equal(summary(rec, TRUE) %>% arrange(variable), exp_res) 80}) 81 82test_that('new role for existing NA role', { 83 84 rec <- recipe(x = biomass) 85 rec <- update_role(rec, sample, new_role = "some other role") 86 87 exp_res <- tibble(variable = colnames(biomass), 88 type = rep(c("nominal", "numeric"), c(2, 6)), 89 role = rep(c("some other role", NA), c(1, length(colnames(biomass)) - 1)), 90 source = "original") 91 expect_equal(summary(rec, TRUE), exp_res) 92}) 93 94test_that('new role with specified type', { 95 96 rec <- recipe(x = biomass) 97 rec <- update_role(rec, sample, new_role = "blah") 98 rec <- add_role(rec, sample, new_role = "some other role", new_type = "new type") 99 100 exp_res <- tibble(variable = c("sample", colnames(biomass)), 101 type = c("nominal", "new type", "nominal", rep("numeric", 6)), 102 role = rep(c("blah", "some other role", NA), c(1, 1, 7)), 103 source = "original") 104 expect_equal(summary(rec, TRUE), exp_res) 105}) 106 107test_that('add new role when two already exist with different types', { 108 109 # type of the first existing role found is used 110 rec <- recipe(x = biomass) 111 rec <- update_role(rec, sample, new_role = "blah") 112 rec <- add_role(rec, sample, new_role = "some other role", new_type = "new type") 113 rec <- add_role(rec, sample, new_role = "another role") 114 115 exp_res <- tibble(variable = c("sample", "sample", colnames(biomass)), 116 type = c("nominal", "new type", "nominal", "nominal", rep("numeric", 6)), 117 role = c("blah", "some other role", "another role", rep(NA, 7)), 118 source = "original") 119 expect_equal(summary(rec, TRUE), exp_res) 120}) 121 122test_that('existing role is skipped', { 123 124 rec <- recipe(x = biomass) 125 rec <- update_role(rec, sample, new_role = "blah") 126 rec <- add_role(rec, sample, new_role = "some other role") 127 128 # skip me 129 expect_warning( 130 rec <- add_role(rec, sample, new_role = "some other role"), 131 "Role, 'some other role', already exists" 132 ) 133 134 # also tests the order, new roles come directly after old ones 135 exp_res <- tibble(variable = c("sample", colnames(biomass)), 136 type = rep(c("nominal", "numeric"), c(3, 6)), 137 role = rep(c("blah", "some other role", NA), c(1, 1, 7)), 138 source = "original") 139 expect_equal(summary(rec, TRUE), exp_res) 140 141}) 142 143test_that('existing role is skipped, but new one is added', { 144 145 rec <- recipe(x = biomass) 146 rec <- update_role(rec, sample, new_role = "blah") 147 rec <- add_role(rec, sample, new_role = "some other role") 148 149 # partially skip me 150 expect_warning( 151 rec <- add_role(rec, sample, dataset, new_role = "some other role"), 152 "Role, 'some other role', already exists" 153 ) 154 155 exp_res <- tibble(variable = c( 156 rep(c("sample", "dataset"), c(2,2)), 157 setdiff(colnames(biomass), c("sample", "dataset")) 158 ), 159 type = rep(c("nominal", "numeric"), c(4, 6)), 160 role = c("blah", "some other role", NA, "some other role", rep(NA, 6)), 161 source = "original") 162 expect_equal(summary(rec, TRUE), exp_res) 163}) 164 165test_that('cannot add roles if the current one is `NA`', { 166 rec <- recipe(x = biomass) 167 expect_error(add_role(rec, sample, sulfur), "No role currently exists") 168}) 169 170test_that("`update_role()` cannot be ambiguous", { 171 rec <- recipe(HHV ~ ., data = biomass) 172 rec <- add_role(rec, sample, new_role = "x") 173 174 expect_error( 175 update_role(rec, sample, new_role = "y"), 176 "`old_role` can only be `NULL` when" 177 ) 178}) 179 180test_that("`new_role` cannot be `NA_character_`", { 181 rec <- recipe(x = biomass) 182 183 expect_error( 184 add_role(rec, sample, new_role = NA_character_), 185 "`new_role` must not be `NA`." 186 ) 187 188 expect_error( 189 update_role(rec, sample, new_role = NA_character_), 190 "`new_role` must not be `NA`." 191 ) 192}) 193 194test_that('remove roles', { 195 196 rec <- recipe(x = biomass) 197 rec <- update_role(rec, sample, new_role = "role1") 198 expect_error( 199 rec <- remove_role(rec, sample, old_role = NA) 200 ) 201 expect_error( 202 rec <- remove_role(rec, sample) 203 ) 204 205 expect_warning( 206 remove_role(rec, sample, old_role = "non-existant"), 207 "Column, 'sample', does not have role, 'non-existant'." 208 ) 209 210 rec <- remove_role(rec, sample, old_role = "role1") 211 212 exp_res <- tibble(variable = colnames(biomass), 213 type = rep(c("nominal", "numeric"), c(2, 6)), 214 role = NA_character_, 215 source = "original") 216 expect_equal(summary(rec, TRUE), exp_res) 217 218}) 219 220test_that('New type for an existing role can be added', { 221 222 rec <- recipe(x = biomass) 223 rec <- update_role(rec, sample, new_role = "role1") 224 rec <- add_role(rec, sample, new_role = "role1", new_type = "text") 225 226 exp_res <- tibble(variable = c("sample", colnames(biomass)), 227 type = c(c("nominal", "text", "nominal"), rep("numeric", 6)), 228 role = c("role1", "role1", rep(NA, 7)), 229 source = "original") 230 expect_equal(summary(rec, TRUE), exp_res) 231 232}) 233 234test_that("can use tidyselect ops in role selection", { 235 rec <- recipe(mpg ~ ., mtcars) %>% 236 step_center(all_predictors()) 237 238 # Swap "predictor" for "foo" 239 rec <- update_role( 240 rec, 241 starts_with("c") & !carb, 242 new_role = "foo", 243 old_role = "predictor" 244 ) 245 246 expect_identical( 247 rec$term_info$role[rec$term_info$variable == "cyl"], 248 "foo" 249 ) 250 251 # Add "predictor" back 252 rec <- add_role( 253 rec, 254 starts_with("c") & !carb, 255 new_role = "predictor" 256 ) 257 258 expect_identical( 259 rec$term_info$role[rec$term_info$variable == "cyl"], 260 c("foo", "predictor") 261 ) 262 263 # Remove "foo" 264 rec <- remove_role( 265 rec, 266 starts_with("c") & !carb, 267 old_role = "foo" 268 ) 269 270 expect_identical( 271 rec$term_info$role[rec$term_info$variable == "cyl"], 272 "predictor" 273 ) 274}) 275 276 277test_that("empty dots and zero column selections return input with a warning", { 278 rec <- recipe(x = biomass) 279 280 expect_warning( 281 rec2 <- add_role(rec), 282 "No columns were selected in `add_role[(][)]`" 283 ) 284 expect_identical(rec2, rec) 285 286 expect_warning( 287 rec2 <- update_role(rec), 288 "No columns were selected in `update_role[(][)]`" 289 ) 290 expect_identical(rec2, rec) 291 292 expect_warning( 293 rec2 <- remove_role(rec, old_role = "foo"), 294 "No columns were selected in `remove_role[(][)]`" 295 ) 296 expect_identical(rec2, rec) 297 298 expect_warning( 299 rec2 <- add_role(rec, starts_with("foobar")), 300 "No columns were selected in `add_role[(][)]`" 301 ) 302 expect_identical(rec2, rec) 303 304 expect_warning( 305 rec2 <- update_role(rec, starts_with("foobar")), 306 "No columns were selected in `update_role[(][)]`" 307 ) 308 expect_identical(rec2, rec) 309 310 expect_warning( 311 rec2 <- remove_role(rec, starts_with("foobar"), old_role = "foo"), 312 "No columns were selected in `remove_role[(][)]`" 313 ) 314 expect_identical(rec2, rec) 315}) 316 317test_that('bad args', { 318 expect_error( 319 recipe(x = biomass) %>% 320 add_role(carbon, new_role = letters[1:2]), 321 "`new_role` must have length 1." 322 ) 323 324 expect_error( 325 recipe(x = biomass) %>% 326 add_role(carbon, new_role = "a", new_type = letters[1:2]), 327 "`new_type` must have length 1." 328 ) 329 330 expect_error( 331 recipe(x = biomass) %>% 332 update_role(carbon, new_role = c("a", "b")), 333 "`new_role` must have length 1." 334 ) 335 336 expect_error( 337 recipe(x = biomass) %>% 338 update_role(carbon, old_role = c("a", "b")), 339 "`old_role` must have length 1." 340 ) 341 342}) 343 344 345# ------------------------------------------------------------------------------ 346# Multiples roles + Selection testing 347 348test_that("adding multiple roles/types does not duplicate prepped columns", { 349 350 rec <- recipe(HHV ~ ., data = biomass) 351 352 # second role 353 expect_equal( 354 rec %>% 355 add_role(carbon, new_role = "carb") %>% 356 prep(training = biomass) %>% 357 juice() %>% 358 ncol(), 359 360 8 361 ) 362 363 # second type 364 expect_equal( 365 rec %>% 366 add_role(carbon, new_type = "carb") %>% 367 prep(training = biomass) %>% 368 juice() %>% 369 ncol(), 370 371 8 372 ) 373 374}) 375 376test_that("type selectors can be combined", { 377 378 rec <- recipe(HHV ~ ., data = biomass) 379 380 prepped <- rec %>% 381 add_role(carbon, new_role = "predictor", new_type = "carb") %>% 382 step_center(all_numeric(), -has_type("carb")) %>% 383 prep(training = biomass) 384 385 expect_equal( 386 names(prepped$steps[[1]]$means), 387 c("hydrogen", "oxygen", "nitrogen", "sulfur", "HHV") 388 ) 389 390}) 391 392test_that("step_rm() removes ALL mention of variables with that role", { 393 394 rec <- recipe(HHV ~ ., data = biomass) 395 396 rec_prepped <- rec %>% 397 add_role(carbon, new_role = "predictor", new_type = "carb") %>% 398 step_rm(has_type("carb")) %>% 399 prep(training = biomass) %>% 400 summary() 401 402 expect_false("carbon" %in% rec_prepped$variable) 403}) 404 405# ------------------------------------------------------------------------------ 406# Tests related to #296 407# https://github.com/tidymodels/recipes/issues/296 408 409test_that("Existing `NA` roles are not modified in prep() when new columns are generated", { 410 411 rec_dummy <- recipe(x = iris) %>% 412 update_role(Sepal.Length, new_role = "outcome") %>% 413 update_role(Species, new_role = "predictor") %>% 414 step_dummy(Species) 415 416 prepped_rec_dummy <- prep(rec_dummy, iris) 417 418 orig <- summary(rec_dummy) 419 new <- summary(prepped_rec_dummy) 420 421 # These should be identical except for the modified Species term 422 expect_equal( 423 filter(orig, !grepl("Species", variable)), 424 filter(new, !grepl("Species", variable)) 425 ) 426 427 expect_equal( 428 filter(new, grepl("Species", variable)), 429 tibble( 430 variable = c("Species_versicolor", "Species_virginica"), 431 type = rep("numeric", times = 2), 432 role = rep("predictor", times = 2), 433 source = rep("derived", times = 2) 434 ) 435 ) 436 437 # Juicing with all predictors should only give these two columns 438 expect_equal( 439 colnames(juice(prepped_rec_dummy, all_predictors())), 440 c("Species_versicolor", "Species_virginica") 441 ) 442 443}) 444 445 446test_that("Existing `NA` roles are not modified in prep() when multiple new columns are generated", { 447 448 rec <- recipe(x = iris) %>% 449 update_role(Sepal.Length, new_role = "outcome") %>% 450 update_role(Sepal.Width, new_role = "predictor") %>% 451 update_role(Species, new_role = "predictor") %>% 452 step_dummy(Species) %>% 453 step_bs(Sepal.Width) 454 455 prepped_rec <- prep(rec, iris) 456 457 orig <- summary(rec) 458 new <- summary(prepped_rec) 459 460 # These should be identical except for the 461 # modified Species and Sepal.Width terms 462 expect_equal( 463 filter(orig, !grepl("Species", variable), !grepl("Sepal.Width", variable)), 464 filter(new, !grepl("Species", variable), !grepl("Sepal.Width", variable)) 465 ) 466 467}) 468 469test_that("Roles are correcly selected in bake", { 470 471 x <- tibble::tibble( 472 a = runif(10), 473 b = runif(10), 474 c = runif(10) 475 ) 476 477 rec <- recipe(c ~ ., x) %>% 478 update_role(b, new_role = "id") %>% 479 add_role(a, new_role = "id") %>% 480 prep() 481 482 o <- recipes::bake(rec, x, recipes::has_role("id")) 483 expect_equal(names(o), c("a", "b")) 484 485}) 486 487