1#' Split a string.
2#'
3#' Return a list of character vectors of the words in the string, using \code{sep} as the delimiter string.
4#'
5#' If \code{maxsplit} is given, at most \code{maxsplit} splits are done
6#' (thus, the character vector will have at most \code{maxsplit + 1} elements).
7#' If \code{maxsplit} is not specified, then there is no limit on the number of
8#' splits (all possible splits are made). If \code{sep} is given, consecutive
9#' delimiters are not grouped together and are deemed to delimit empty strings.
10#' The \code{sep} argument may consist of multiple characters. If \code{sep} is
11#' not specified, any whitespace string is a separator. Splitting an empty
12#' string returns an empty string.
13#'
14#' @param str A character vector.
15#' @param sep A character string.
16#' @param maxsplit A numeric integer.
17#'
18#' @return A list of character vectors.
19#'
20#' @references \url{https://docs.python.org/3/library/stdtypes.html#str.split}
21#'
22#' @seealso \code{\link{pystr_join}}, \code{\link{pystr_rsplit}}
23#'
24#' @examples
25#' pystr_split("www.example.com", ".")
26#' pystr_split("123123123", "2", 2)
27#' pystr_split("1,,2,3", ",")
28#' pystr_split("a--b--c", "--")
29#'
30#' @export
31pystr_split <- function(str, sep=" ", maxsplit=nchar(str)-1) {
32  return(mapply(pystr_split_, str, sep, maxsplit, SIMPLIFY=FALSE, USE.NAMES=FALSE))
33}
34
35pystr_split_ <- function(str, sep, maxsplit) {
36  if(maxsplit == 0) {
37    return(str)
38  }
39
40  if(sep == "") {
41    splits = strsplit(str, sep)[[1]]
42
43    if(maxsplit >= nchar(str) - 1) {
44      return(splits)
45    }
46
47    first = splits[1:maxsplit]
48    last = pystr_join(splits[(maxsplit + 1):length(splits)], "")
49    return(c(first, last))
50  }
51
52  num_splits = min(pystr_count(str, sep), maxsplit)
53  cum_splits = 0
54  elements = c()
55  remaining = str
56
57  while(cum_splits < num_splits) {
58    if(remaining == "") {return(elements)}
59    parts = pystr_partition(remaining, sep)[[1]]
60    elements = c(elements, parts[1])
61    remaining = parts[3]
62    cum_splits = cum_splits + 1
63  }
64
65  return(c(elements, remaining))
66}
67