1#' Radial Basis Function Kernel PCA Signal Extraction
2#'
3#' `step_kpca_rbf` creates a *specification* of a recipe step that
4#'  will convert numeric data into one or more principal components
5#'  using a radial basis function kernel basis expansion.
6#'
7#' @inheritParams step_pca
8#' @inheritParams step_center
9#' @param sigma A numeric value for the radial basis function parameter.
10#' @param res An S4 [kernlab::kpca()] object is stored
11#'  here once this preprocessing step has be trained by
12#'  [`prep()`][prep.recipe()].
13#' @template step-return
14#' @family multivariate transformation steps
15#' @export
16#' @template kpca-info
17#'
18#' @examples
19#' library(modeldata)
20#' data(biomass)
21#'
22#' biomass_tr <- biomass[biomass$dataset == "Training",]
23#' biomass_te <- biomass[biomass$dataset == "Testing",]
24#'
25#' rec <- recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
26#'               data = biomass_tr)
27#'
28#' kpca_trans <- rec %>%
29#'   step_YeoJohnson(all_numeric_predictors()) %>%
30#'   step_normalize(all_numeric_predictors()) %>%
31#'   step_kpca_rbf(all_numeric_predictors())
32#'
33#' if (require(dimRed) & require(kernlab)) {
34#'   kpca_estimates <- prep(kpca_trans, training = biomass_tr)
35#'
36#'   kpca_te <- bake(kpca_estimates, biomass_te)
37#'
38#'   rng <- extendrange(c(kpca_te$kPC1, kpca_te$kPC2))
39#'   plot(kpca_te$kPC1, kpca_te$kPC2,
40#'        xlim = rng, ylim = rng)
41#'
42#'   tidy(kpca_trans, number = 3)
43#'   tidy(kpca_estimates, number = 3)
44#' }
45#'
46step_kpca_rbf <-
47  function(recipe,
48           ...,
49           role = "predictor",
50           trained = FALSE,
51           num_comp = 5,
52           res = NULL,
53           sigma = 0.2,
54           prefix = "kPC",
55           keep_original_cols = FALSE,
56           skip = FALSE,
57           id = rand_id("kpca_rbf")) {
58
59    recipes_pkg_check(required_pkgs.step_kpca_rbf())
60
61    add_step(
62      recipe,
63      step_kpca_rbf_new(
64        terms = ellipse_check(...),
65        role = role,
66        trained = trained,
67        num_comp = num_comp,
68        res = res,
69        sigma = sigma,
70        prefix = prefix,
71        keep_original_cols = keep_original_cols,
72        skip = skip,
73        id = id
74      )
75    )
76  }
77
78step_kpca_rbf_new <-
79  function(terms, role, trained, num_comp, res, sigma, prefix,
80           keep_original_cols, skip, id) {
81    step(
82      subclass = "kpca_rbf",
83      terms = terms,
84      role = role,
85      trained = trained,
86      num_comp = num_comp,
87      res = res,
88      sigma = sigma,
89      prefix = prefix,
90      keep_original_cols = keep_original_cols,
91      skip = skip,
92      id = id
93    )
94  }
95
96#' @export
97prep.step_kpca_rbf <- function(x, training, info = NULL, ...) {
98  col_names <- recipes_eval_select(x$terms, training, info)
99  check_type(training[, col_names])
100
101  if (x$num_comp > 0) {
102    kprc <-
103      dimRed::kPCA(
104        stdpars = c(
105          list(ndim = x$num_comp),
106          list(kernel = "rbfdot", kpar = list(sigma = x$sigma)
107          )
108        )
109      )
110    kprc <-
111      try(
112        suppressMessages({
113          kprc@fun(
114            dimRed::dimRedData(as.data.frame(training[, col_names, drop = FALSE])),
115            kprc@stdpars
116          )
117        }),
118        silent =  TRUE
119      )
120
121    if (inherits(kprc, "try-error")) {
122      rlang::abort(paste0("`step_kpca_rbf` failed with error:\n",
123                          as.character(kprc)))
124    }
125  } else {
126    kprc <- list(x_vars = col_names)
127  }
128
129  step_kpca_rbf_new(
130    terms = x$terms,
131    role = x$role,
132    trained = TRUE,
133    num_comp = x$num_comp,
134    sigma = x$sigma,
135    res = kprc,
136    prefix = x$prefix,
137    keep_original_cols = get_keep_original_cols(x),
138    skip = x$skip,
139    id = x$id
140  )
141}
142
143#' @export
144bake.step_kpca_rbf <- function(object, new_data, ...) {
145  if (object$num_comp > 0) {
146    pca_vars <- colnames(environment(object$res@apply)$indata)
147    comps <- object$res@apply(
148      dimRed::dimRedData(as.data.frame(new_data[, pca_vars, drop = FALSE]))
149    )@data
150    comps <- comps[, 1:object$num_comp, drop = FALSE]
151    comps <- check_name(comps, new_data, object)
152    new_data <- bind_cols(new_data, as_tibble(comps))
153    keep_original_cols <- get_keep_original_cols(object)
154
155    if (!keep_original_cols) {
156      new_data <- new_data[, !(colnames(new_data) %in% pca_vars), drop = FALSE]
157    }
158  }
159  as_tibble(new_data)
160}
161
162print.step_kpca_rbf <- function(x, width = max(20, options()$width - 40), ...) {
163  if (x$trained) {
164    if (x$num_comp == 0) {
165      cat("No kPCA components were extracted.\n")
166    } else {
167      cat("RBF kernel PCA (", x$res@pars$kernel, ") extraction with ", sep = "")
168      cat(format_ch_vec(colnames(x$res@org.data), width = width))
169    }
170  } else {
171    cat("RBF kernel PCA extraction with ", sep = "")
172    cat(format_selectors(x$terms, width = width))
173  }
174  if (x$trained) cat(" [trained]\n") else cat("\n")
175  invisible(x)
176}
177
178
179#' @rdname tidy.recipe
180#' @export
181tidy.step_kpca_rbf <- function(x, ...) {
182  if (is_trained(x)) {
183    if (x$num_comp > 0) {
184      res <- tibble(terms = colnames(x$res@org.data))
185    } else {
186      res <- tibble(terms = unname(x$res$x_vars))
187    }
188  } else {
189    term_names <- sel2char(x$terms)
190    res <- tibble(terms = term_names)
191  }
192  res$id <- x$id
193  res
194}
195
196
197#' @rdname tunable.recipe
198#' @export
199tunable.step_kpca_rbf <- function(x, ...) {
200  tibble::tibble(
201    name = c("num_comp", "sigma"),
202    call_info = list(
203      list(pkg = "dials", fun = "num_comp", range = c(1L, 4L)),
204      list(pkg = "dials", fun = "rbf_sigma")
205    ),
206    source = "recipe",
207    component = "step_kpca_rbf",
208    component_id = x$id
209  )
210}
211
212#' @rdname required_pkgs.recipe
213#' @export
214required_pkgs.step_kpca_rbf <- function(x, ...) {
215  c("dimRed", "kernlab")
216}
217