1% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/impute_mode.R
3\name{step_impute_mode}
4\alias{step_impute_mode}
5\alias{step_modeimpute}
6\title{Impute nominal data using the most common value}
7\usage{
8step_impute_mode(
9  recipe,
10  ...,
11  role = NA,
12  trained = FALSE,
13  modes = NULL,
14  ptype = NULL,
15  skip = FALSE,
16  id = rand_id("impute_mode")
17)
18
19step_modeimpute(
20  recipe,
21  ...,
22  role = NA,
23  trained = FALSE,
24  modes = NULL,
25  ptype = NULL,
26  skip = FALSE,
27  id = rand_id("impute_mode")
28)
29}
30\arguments{
31\item{recipe}{A recipe object. The step will be added to the
32sequence of operations for this recipe.}
33
34\item{...}{One or more selector functions to choose variables
35for this step. See \code{\link[=selections]{selections()}} for more details.}
36
37\item{role}{Not used by this step since no new variables are
38created.}
39
40\item{trained}{A logical to indicate if the quantities for
41preprocessing have been estimated.}
42
43\item{modes}{A named character vector of modes. This is
44\code{NULL} until computed by \code{\link[=prep.recipe]{prep.recipe()}}.}
45
46\item{ptype}{A data frame prototype to cast new data sets to. This is
47commonly a 0-row slice of the training set.}
48
49\item{skip}{A logical. Should the step be skipped when the
50recipe is baked by \code{\link[=bake.recipe]{bake.recipe()}}? While all operations are baked
51when \code{\link[=prep.recipe]{prep.recipe()}} is run, some operations may not be able to be
52conducted on new data (e.g. processing the outcome variable(s)).
53Care should be taken when using \code{skip = TRUE} as it may affect
54the computations for subsequent operations.}
55
56\item{id}{A character string that is unique to this step to identify it.}
57}
58\value{
59An updated version of \code{recipe} with the new step added to the
60sequence of any existing operations.
61}
62\description{
63\code{step_impute_mode} creates a \emph{specification} of a
64recipe step that will substitute missing values of nominal
65variables by the training set mode of those variables.
66}
67\details{
68\code{step_impute_mode} estimates the variable modes
69from the data used in the \code{training} argument of
70\code{prep.recipe}. \code{bake.recipe} then applies the new
71values to new data sets using these values. If the training set
72data has more than one mode, one is selected at random.
73
74When you \code{\link[=tidy]{tidy()}} this step, a tibble with columns \code{terms} (the
75selectors or variables selected) and \code{model} (the mode
76value) is returned.
77
78As of \code{recipes} 0.1.16, this function name changed from \code{step_modeimpute()}
79to \code{step_impute_mode()}.
80}
81\examples{
82library(modeldata)
83data("credit_data")
84
85## missing data per column
86vapply(credit_data, function(x) mean(is.na(x)), c(num = 0))
87
88set.seed(342)
89in_training <- sample(1:nrow(credit_data), 2000)
90
91credit_tr <- credit_data[ in_training, ]
92credit_te <- credit_data[-in_training, ]
93missing_examples <- c(14, 394, 565)
94
95rec <- recipe(Price ~ ., data = credit_tr)
96
97impute_rec <- rec \%>\%
98  step_impute_mode(Status, Home, Marital)
99
100imp_models <- prep(impute_rec, training = credit_tr)
101
102imputed_te <- bake(imp_models, new_data = credit_te, everything())
103
104table(credit_te$Home, imputed_te$Home, useNA = "always")
105
106tidy(impute_rec, number = 1)
107tidy(imp_models, number = 1)
108}
109\seealso{
110Other imputation steps:
111\code{\link{step_impute_bag}()},
112\code{\link{step_impute_knn}()},
113\code{\link{step_impute_linear}()},
114\code{\link{step_impute_lower}()},
115\code{\link{step_impute_mean}()},
116\code{\link{step_impute_median}()},
117\code{\link{step_impute_roll}()}
118}
119\concept{imputation steps}
120