1\name{split}
2\alias{split}
3\alias{split.data.table}
4\title{ Split data.table into chunks in a list }
5\description{
6  Split method for data.table. Faster and more flexible. Be aware that processing list of data.tables will be generally much slower than manipulation in single data.table by group using \code{by} argument, read more on \code{\link{data.table}}.
7}
8\usage{
9\method{split}{data.table}(x, f, drop = FALSE,
10      by, sorted = FALSE, keep.by = TRUE, flatten = TRUE,
11      \dots, verbose = getOption("datatable.verbose"))
12}
13\arguments{
14  \item{x}{data.table }
15  \item{f}{factor or list of factors. Same as \code{\link[base:split]{split.data.frame}}. Use \code{by} argument instead, this is just for consistency with data.frame method.}
16  \item{drop}{logical. Default \code{FALSE} will not drop empty list elements caused by factor levels not referred by that factors. Works also with new arguments of split data.table method.}
17  \item{by}{character vector. Column names on which split should be made. For \code{length(by) > 1L} and \code{flatten} FALSE it will result nested lists with data.tables on leafs.}
18  \item{sorted}{When default \code{FALSE} it will retain the order of groups we are splitting on. When \code{TRUE} then sorted list(s) are returned. Does not have effect for \code{f} argument.}
19  \item{keep.by}{logical default \code{TRUE}. Keep column provided to \code{by} argument.}
20  \item{flatten}{logical default \code{TRUE} will unlist nested lists of data.tables. When using \code{f} results are always flattened to list of data.tables.}
21  \item{\dots}{passed to data.frame way of processing when using \code{f} argument.}
22  \item{verbose}{logical default \code{FALSE}. When \code{TRUE} it will print to console data.table split query used to split data.}
23}
24\details{
25    Argument \code{f} is just for consistency in usage to data.frame method. Recommended is to use \code{by} argument instead, it will be faster, more flexible, and by default will preserve order according to order in data.
26}
27\value{
28    List of \code{data.table}s. If using \code{flatten} FALSE and \code{length(by) > 1L} then recursively nested lists having \code{data.table}s as leafs of grouping according to \code{by} argument.
29}
30\seealso{ \code{\link{data.table}}, \code{\link{rbindlist}} }
31\examples{
32set.seed(123)
33DT = data.table(x1 = rep(letters[1:2], 6),
34                x2 = rep(letters[3:5], 4),
35                x3 = rep(letters[5:8], 3),
36                y = rnorm(12))
37DT = DT[sample(.N)]
38DF = as.data.frame(DT)
39
40# split consistency with data.frame: `x, f, drop`
41all.equal(
42    split(DT, list(DT$x1, DT$x2)),
43    lapply(split(DF, list(DF$x1, DF$x2)), setDT)
44)
45
46# nested list using `flatten` arguments
47split(DT, by=c("x1", "x2"))
48split(DT, by=c("x1", "x2"), flatten=FALSE)
49
50# dealing with factors
51fdt = DT[, c(lapply(.SD, as.factor), list(y=y)), .SDcols=x1:x3]
52fdf = as.data.frame(fdt)
53sdf = split(fdf, list(fdf$x1, fdf$x2))
54all.equal(
55    split(fdt, by=c("x1", "x2"), sorted=TRUE),
56    lapply(sdf[sort(names(sdf))], setDT)
57)
58
59# factors having unused levels, drop FALSE, TRUE
60fdt = DT[, .(x1 = as.factor(c(as.character(x1), "c"))[-13L],
61             x2 = as.factor(c("a", as.character(x2)))[-1L],
62             x3 = as.factor(c("a", as.character(x3), "z"))[c(-1L,-14L)],
63             y = y)]
64fdf = as.data.frame(fdt)
65sdf = split(fdf, list(fdf$x1, fdf$x2))
66all.equal(
67    split(fdt, by=c("x1", "x2"), sorted=TRUE),
68    lapply(sdf[sort(names(sdf))], setDT)
69)
70sdf = split(fdf, list(fdf$x1, fdf$x2), drop=TRUE)
71all.equal(
72    split(fdt, by=c("x1", "x2"), sorted=TRUE, drop=TRUE),
73    lapply(sdf[sort(names(sdf))], setDT)
74)
75}
76\keyword{ data }
77