1% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/YeoJohnson.R
3\name{step_YeoJohnson}
4\alias{step_YeoJohnson}
5\title{Yeo-Johnson Transformation}
6\usage{
7step_YeoJohnson(
8  recipe,
9  ...,
10  role = NA,
11  trained = FALSE,
12  lambdas = NULL,
13  limits = c(-5, 5),
14  num_unique = 5,
15  na_rm = TRUE,
16  skip = FALSE,
17  id = rand_id("YeoJohnson")
18)
19}
20\arguments{
21\item{recipe}{A recipe object. The step will be added to the
22sequence of operations for this recipe.}
23
24\item{...}{One or more selector functions to choose variables
25for this step. See \code{\link[=selections]{selections()}} for more details.}
26
27\item{role}{Not used by this step since no new variables are
28created.}
29
30\item{trained}{A logical to indicate if the quantities for
31preprocessing have been estimated.}
32
33\item{lambdas}{A numeric vector of transformation values. This
34is \code{NULL} until computed by \code{\link[=prep.recipe]{prep.recipe()}}.}
35
36\item{limits}{A length 2 numeric vector defining the range to
37compute the transformation parameter lambda.}
38
39\item{num_unique}{An integer where data that have less possible
40values will not be evaluated for a transformation.}
41
42\item{na_rm}{A logical value indicating whether \code{NA}
43values should be removed during computations.}
44
45\item{skip}{A logical. Should the step be skipped when the
46recipe is baked by \code{\link[=bake.recipe]{bake.recipe()}}? While all operations are baked
47when \code{\link[=prep.recipe]{prep.recipe()}} is run, some operations may not be able to be
48conducted on new data (e.g. processing the outcome variable(s)).
49Care should be taken when using \code{skip = TRUE} as it may affect
50the computations for subsequent operations.}
51
52\item{id}{A character string that is unique to this step to identify it.}
53}
54\value{
55An updated version of \code{recipe} with the new step added to the
56sequence of any existing operations.
57}
58\description{
59\code{step_YeoJohnson} creates a \emph{specification} of a
60recipe step that will transform data using a simple Yeo-Johnson
61transformation.
62}
63\details{
64The Yeo-Johnson transformation is very similar to the
65Box-Cox but does not require the input variables to be strictly
66positive. In the package, the partial log-likelihood function is
67directly optimized within a reasonable set of transformation
68values (which can be changed by the user).
69
70This transformation is typically done on the outcome variable
71using the residuals for a statistical model (such as ordinary
72least squares). Here, a simple null model (intercept only) is
73used to apply the transformation to the \emph{predictor}
74variables individually. This can have the effect of making the
75variable distributions more symmetric.
76
77If the transformation parameters are estimated to be very
78closed to the bounds, or if the optimization fails, a value of
79\code{NA} is used and no transformation is applied.
80
81When you \code{\link[=tidy]{tidy()}} this step, a tibble with columns \code{terms} (the
82selectors or variables selected) and \code{value} (the
83lambda estimate) is returned.
84}
85\examples{
86
87library(modeldata)
88data(biomass)
89
90biomass_tr <- biomass[biomass$dataset == "Training",]
91biomass_te <- biomass[biomass$dataset == "Testing",]
92
93rec <- recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
94              data = biomass_tr)
95
96yj_transform <- step_YeoJohnson(rec,  all_numeric())
97
98yj_estimates <- prep(yj_transform, training = biomass_tr)
99
100yj_te <- bake(yj_estimates, biomass_te)
101
102plot(density(biomass_te$sulfur), main = "before")
103plot(density(yj_te$sulfur), main = "after")
104
105tidy(yj_transform, number = 1)
106tidy(yj_estimates, number = 1)
107}
108\references{
109Yeo, I. K., and Johnson, R. A. (2000). A new family of power
110transformations to improve normality or symmetry. \emph{Biometrika}.
111}
112\seealso{
113Other individual transformation steps:
114\code{\link{step_BoxCox}()},
115\code{\link{step_bs}()},
116\code{\link{step_harmonic}()},
117\code{\link{step_hyperbolic}()},
118\code{\link{step_inverse}()},
119\code{\link{step_invlogit}()},
120\code{\link{step_logit}()},
121\code{\link{step_log}()},
122\code{\link{step_mutate}()},
123\code{\link{step_ns}()},
124\code{\link{step_poly}()},
125\code{\link{step_relu}()},
126\code{\link{step_sqrt}()}
127}
128\concept{individual transformation steps}
129