1% Generated by roxygen2: do not edit by hand 2% Please edit documentation in R/impute_mode.R 3\name{step_impute_mode} 4\alias{step_impute_mode} 5\alias{step_modeimpute} 6\title{Impute nominal data using the most common value} 7\usage{ 8step_impute_mode( 9 recipe, 10 ..., 11 role = NA, 12 trained = FALSE, 13 modes = NULL, 14 ptype = NULL, 15 skip = FALSE, 16 id = rand_id("impute_mode") 17) 18 19step_modeimpute( 20 recipe, 21 ..., 22 role = NA, 23 trained = FALSE, 24 modes = NULL, 25 ptype = NULL, 26 skip = FALSE, 27 id = rand_id("impute_mode") 28) 29} 30\arguments{ 31\item{recipe}{A recipe object. The step will be added to the 32sequence of operations for this recipe.} 33 34\item{...}{One or more selector functions to choose variables 35for this step. See \code{\link[=selections]{selections()}} for more details.} 36 37\item{role}{Not used by this step since no new variables are 38created.} 39 40\item{trained}{A logical to indicate if the quantities for 41preprocessing have been estimated.} 42 43\item{modes}{A named character vector of modes. This is 44\code{NULL} until computed by \code{\link[=prep.recipe]{prep.recipe()}}.} 45 46\item{ptype}{A data frame prototype to cast new data sets to. This is 47commonly a 0-row slice of the training set.} 48 49\item{skip}{A logical. Should the step be skipped when the 50recipe is baked by \code{\link[=bake.recipe]{bake.recipe()}}? While all operations are baked 51when \code{\link[=prep.recipe]{prep.recipe()}} is run, some operations may not be able to be 52conducted on new data (e.g. processing the outcome variable(s)). 53Care should be taken when using \code{skip = TRUE} as it may affect 54the computations for subsequent operations.} 55 56\item{id}{A character string that is unique to this step to identify it.} 57} 58\value{ 59An updated version of \code{recipe} with the new step added to the 60sequence of any existing operations. 61} 62\description{ 63\code{step_impute_mode} creates a \emph{specification} of a 64recipe step that will substitute missing values of nominal 65variables by the training set mode of those variables. 66} 67\details{ 68\code{step_impute_mode} estimates the variable modes 69from the data used in the \code{training} argument of 70\code{prep.recipe}. \code{bake.recipe} then applies the new 71values to new data sets using these values. If the training set 72data has more than one mode, one is selected at random. 73 74When you \code{\link[=tidy]{tidy()}} this step, a tibble with columns \code{terms} (the 75selectors or variables selected) and \code{model} (the mode 76value) is returned. 77 78As of \code{recipes} 0.1.16, this function name changed from \code{step_modeimpute()} 79to \code{step_impute_mode()}. 80} 81\examples{ 82library(modeldata) 83data("credit_data") 84 85## missing data per column 86vapply(credit_data, function(x) mean(is.na(x)), c(num = 0)) 87 88set.seed(342) 89in_training <- sample(1:nrow(credit_data), 2000) 90 91credit_tr <- credit_data[ in_training, ] 92credit_te <- credit_data[-in_training, ] 93missing_examples <- c(14, 394, 565) 94 95rec <- recipe(Price ~ ., data = credit_tr) 96 97impute_rec <- rec \%>\% 98 step_impute_mode(Status, Home, Marital) 99 100imp_models <- prep(impute_rec, training = credit_tr) 101 102imputed_te <- bake(imp_models, new_data = credit_te, everything()) 103 104table(credit_te$Home, imputed_te$Home, useNA = "always") 105 106tidy(impute_rec, number = 1) 107tidy(imp_models, number = 1) 108} 109\seealso{ 110Other imputation steps: 111\code{\link{step_impute_bag}()}, 112\code{\link{step_impute_knn}()}, 113\code{\link{step_impute_linear}()}, 114\code{\link{step_impute_lower}()}, 115\code{\link{step_impute_mean}()}, 116\code{\link{step_impute_median}()}, 117\code{\link{step_impute_roll}()} 118} 119\concept{imputation steps} 120