1#' Radial Basis Function Kernel PCA Signal Extraction 2#' 3#' `step_kpca_rbf` creates a *specification* of a recipe step that 4#' will convert numeric data into one or more principal components 5#' using a radial basis function kernel basis expansion. 6#' 7#' @inheritParams step_pca 8#' @inheritParams step_center 9#' @param sigma A numeric value for the radial basis function parameter. 10#' @param res An S4 [kernlab::kpca()] object is stored 11#' here once this preprocessing step has be trained by 12#' [`prep()`][prep.recipe()]. 13#' @template step-return 14#' @family multivariate transformation steps 15#' @export 16#' @template kpca-info 17#' 18#' @examples 19#' library(modeldata) 20#' data(biomass) 21#' 22#' biomass_tr <- biomass[biomass$dataset == "Training",] 23#' biomass_te <- biomass[biomass$dataset == "Testing",] 24#' 25#' rec <- recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, 26#' data = biomass_tr) 27#' 28#' kpca_trans <- rec %>% 29#' step_YeoJohnson(all_numeric_predictors()) %>% 30#' step_normalize(all_numeric_predictors()) %>% 31#' step_kpca_rbf(all_numeric_predictors()) 32#' 33#' if (require(dimRed) & require(kernlab)) { 34#' kpca_estimates <- prep(kpca_trans, training = biomass_tr) 35#' 36#' kpca_te <- bake(kpca_estimates, biomass_te) 37#' 38#' rng <- extendrange(c(kpca_te$kPC1, kpca_te$kPC2)) 39#' plot(kpca_te$kPC1, kpca_te$kPC2, 40#' xlim = rng, ylim = rng) 41#' 42#' tidy(kpca_trans, number = 3) 43#' tidy(kpca_estimates, number = 3) 44#' } 45#' 46step_kpca_rbf <- 47 function(recipe, 48 ..., 49 role = "predictor", 50 trained = FALSE, 51 num_comp = 5, 52 res = NULL, 53 sigma = 0.2, 54 prefix = "kPC", 55 keep_original_cols = FALSE, 56 skip = FALSE, 57 id = rand_id("kpca_rbf")) { 58 59 recipes_pkg_check(required_pkgs.step_kpca_rbf()) 60 61 add_step( 62 recipe, 63 step_kpca_rbf_new( 64 terms = ellipse_check(...), 65 role = role, 66 trained = trained, 67 num_comp = num_comp, 68 res = res, 69 sigma = sigma, 70 prefix = prefix, 71 keep_original_cols = keep_original_cols, 72 skip = skip, 73 id = id 74 ) 75 ) 76 } 77 78step_kpca_rbf_new <- 79 function(terms, role, trained, num_comp, res, sigma, prefix, 80 keep_original_cols, skip, id) { 81 step( 82 subclass = "kpca_rbf", 83 terms = terms, 84 role = role, 85 trained = trained, 86 num_comp = num_comp, 87 res = res, 88 sigma = sigma, 89 prefix = prefix, 90 keep_original_cols = keep_original_cols, 91 skip = skip, 92 id = id 93 ) 94 } 95 96#' @export 97prep.step_kpca_rbf <- function(x, training, info = NULL, ...) { 98 col_names <- recipes_eval_select(x$terms, training, info) 99 check_type(training[, col_names]) 100 101 if (x$num_comp > 0) { 102 kprc <- 103 dimRed::kPCA( 104 stdpars = c( 105 list(ndim = x$num_comp), 106 list(kernel = "rbfdot", kpar = list(sigma = x$sigma) 107 ) 108 ) 109 ) 110 kprc <- 111 try( 112 suppressMessages({ 113 kprc@fun( 114 dimRed::dimRedData(as.data.frame(training[, col_names, drop = FALSE])), 115 kprc@stdpars 116 ) 117 }), 118 silent = TRUE 119 ) 120 121 if (inherits(kprc, "try-error")) { 122 rlang::abort(paste0("`step_kpca_rbf` failed with error:\n", 123 as.character(kprc))) 124 } 125 } else { 126 kprc <- list(x_vars = col_names) 127 } 128 129 step_kpca_rbf_new( 130 terms = x$terms, 131 role = x$role, 132 trained = TRUE, 133 num_comp = x$num_comp, 134 sigma = x$sigma, 135 res = kprc, 136 prefix = x$prefix, 137 keep_original_cols = get_keep_original_cols(x), 138 skip = x$skip, 139 id = x$id 140 ) 141} 142 143#' @export 144bake.step_kpca_rbf <- function(object, new_data, ...) { 145 if (object$num_comp > 0) { 146 pca_vars <- colnames(environment(object$res@apply)$indata) 147 comps <- object$res@apply( 148 dimRed::dimRedData(as.data.frame(new_data[, pca_vars, drop = FALSE])) 149 )@data 150 comps <- comps[, 1:object$num_comp, drop = FALSE] 151 comps <- check_name(comps, new_data, object) 152 new_data <- bind_cols(new_data, as_tibble(comps)) 153 keep_original_cols <- get_keep_original_cols(object) 154 155 if (!keep_original_cols) { 156 new_data <- new_data[, !(colnames(new_data) %in% pca_vars), drop = FALSE] 157 } 158 } 159 as_tibble(new_data) 160} 161 162print.step_kpca_rbf <- function(x, width = max(20, options()$width - 40), ...) { 163 if (x$trained) { 164 if (x$num_comp == 0) { 165 cat("No kPCA components were extracted.\n") 166 } else { 167 cat("RBF kernel PCA (", x$res@pars$kernel, ") extraction with ", sep = "") 168 cat(format_ch_vec(colnames(x$res@org.data), width = width)) 169 } 170 } else { 171 cat("RBF kernel PCA extraction with ", sep = "") 172 cat(format_selectors(x$terms, width = width)) 173 } 174 if (x$trained) cat(" [trained]\n") else cat("\n") 175 invisible(x) 176} 177 178 179#' @rdname tidy.recipe 180#' @export 181tidy.step_kpca_rbf <- function(x, ...) { 182 if (is_trained(x)) { 183 if (x$num_comp > 0) { 184 res <- tibble(terms = colnames(x$res@org.data)) 185 } else { 186 res <- tibble(terms = unname(x$res$x_vars)) 187 } 188 } else { 189 term_names <- sel2char(x$terms) 190 res <- tibble(terms = term_names) 191 } 192 res$id <- x$id 193 res 194} 195 196 197#' @rdname tunable.recipe 198#' @export 199tunable.step_kpca_rbf <- function(x, ...) { 200 tibble::tibble( 201 name = c("num_comp", "sigma"), 202 call_info = list( 203 list(pkg = "dials", fun = "num_comp", range = c(1L, 4L)), 204 list(pkg = "dials", fun = "rbf_sigma") 205 ), 206 source = "recipe", 207 component = "step_kpca_rbf", 208 component_id = x$id 209 ) 210} 211 212#' @rdname required_pkgs.recipe 213#' @export 214required_pkgs.step_kpca_rbf <- function(x, ...) { 215 c("dimRed", "kernlab") 216} 217