1% Generated by roxygen2: do not edit by hand 2% Please edit documentation in R/YeoJohnson.R 3\name{step_YeoJohnson} 4\alias{step_YeoJohnson} 5\title{Yeo-Johnson Transformation} 6\usage{ 7step_YeoJohnson( 8 recipe, 9 ..., 10 role = NA, 11 trained = FALSE, 12 lambdas = NULL, 13 limits = c(-5, 5), 14 num_unique = 5, 15 na_rm = TRUE, 16 skip = FALSE, 17 id = rand_id("YeoJohnson") 18) 19} 20\arguments{ 21\item{recipe}{A recipe object. The step will be added to the 22sequence of operations for this recipe.} 23 24\item{...}{One or more selector functions to choose variables 25for this step. See \code{\link[=selections]{selections()}} for more details.} 26 27\item{role}{Not used by this step since no new variables are 28created.} 29 30\item{trained}{A logical to indicate if the quantities for 31preprocessing have been estimated.} 32 33\item{lambdas}{A numeric vector of transformation values. This 34is \code{NULL} until computed by \code{\link[=prep.recipe]{prep.recipe()}}.} 35 36\item{limits}{A length 2 numeric vector defining the range to 37compute the transformation parameter lambda.} 38 39\item{num_unique}{An integer where data that have less possible 40values will not be evaluated for a transformation.} 41 42\item{na_rm}{A logical value indicating whether \code{NA} 43values should be removed during computations.} 44 45\item{skip}{A logical. Should the step be skipped when the 46recipe is baked by \code{\link[=bake.recipe]{bake.recipe()}}? While all operations are baked 47when \code{\link[=prep.recipe]{prep.recipe()}} is run, some operations may not be able to be 48conducted on new data (e.g. processing the outcome variable(s)). 49Care should be taken when using \code{skip = TRUE} as it may affect 50the computations for subsequent operations.} 51 52\item{id}{A character string that is unique to this step to identify it.} 53} 54\value{ 55An updated version of \code{recipe} with the new step added to the 56sequence of any existing operations. 57} 58\description{ 59\code{step_YeoJohnson} creates a \emph{specification} of a 60recipe step that will transform data using a simple Yeo-Johnson 61transformation. 62} 63\details{ 64The Yeo-Johnson transformation is very similar to the 65Box-Cox but does not require the input variables to be strictly 66positive. In the package, the partial log-likelihood function is 67directly optimized within a reasonable set of transformation 68values (which can be changed by the user). 69 70This transformation is typically done on the outcome variable 71using the residuals for a statistical model (such as ordinary 72least squares). Here, a simple null model (intercept only) is 73used to apply the transformation to the \emph{predictor} 74variables individually. This can have the effect of making the 75variable distributions more symmetric. 76 77If the transformation parameters are estimated to be very 78closed to the bounds, or if the optimization fails, a value of 79\code{NA} is used and no transformation is applied. 80 81When you \code{\link[=tidy]{tidy()}} this step, a tibble with columns \code{terms} (the 82selectors or variables selected) and \code{value} (the 83lambda estimate) is returned. 84} 85\examples{ 86 87library(modeldata) 88data(biomass) 89 90biomass_tr <- biomass[biomass$dataset == "Training",] 91biomass_te <- biomass[biomass$dataset == "Testing",] 92 93rec <- recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, 94 data = biomass_tr) 95 96yj_transform <- step_YeoJohnson(rec, all_numeric()) 97 98yj_estimates <- prep(yj_transform, training = biomass_tr) 99 100yj_te <- bake(yj_estimates, biomass_te) 101 102plot(density(biomass_te$sulfur), main = "before") 103plot(density(yj_te$sulfur), main = "after") 104 105tidy(yj_transform, number = 1) 106tidy(yj_estimates, number = 1) 107} 108\references{ 109Yeo, I. K., and Johnson, R. A. (2000). A new family of power 110transformations to improve normality or symmetry. \emph{Biometrika}. 111} 112\seealso{ 113Other individual transformation steps: 114\code{\link{step_BoxCox}()}, 115\code{\link{step_bs}()}, 116\code{\link{step_harmonic}()}, 117\code{\link{step_hyperbolic}()}, 118\code{\link{step_inverse}()}, 119\code{\link{step_invlogit}()}, 120\code{\link{step_logit}()}, 121\code{\link{step_log}()}, 122\code{\link{step_mutate}()}, 123\code{\link{step_ns}()}, 124\code{\link{step_poly}()}, 125\code{\link{step_relu}()}, 126\code{\link{step_sqrt}()} 127} 128\concept{individual transformation steps} 129