1#' Split a string. 2#' 3#' Return a list of character vectors of the words in the string, using \code{sep} as the delimiter string. 4#' 5#' If \code{maxsplit} is given, at most \code{maxsplit} splits are done 6#' (thus, the character vector will have at most \code{maxsplit + 1} elements). 7#' If \code{maxsplit} is not specified, then there is no limit on the number of 8#' splits (all possible splits are made). If \code{sep} is given, consecutive 9#' delimiters are not grouped together and are deemed to delimit empty strings. 10#' The \code{sep} argument may consist of multiple characters. If \code{sep} is 11#' not specified, any whitespace string is a separator. Splitting an empty 12#' string returns an empty string. 13#' 14#' @param str A character vector. 15#' @param sep A character string. 16#' @param maxsplit A numeric integer. 17#' 18#' @return A list of character vectors. 19#' 20#' @references \url{https://docs.python.org/3/library/stdtypes.html#str.split} 21#' 22#' @seealso \code{\link{pystr_join}}, \code{\link{pystr_rsplit}} 23#' 24#' @examples 25#' pystr_split("www.example.com", ".") 26#' pystr_split("123123123", "2", 2) 27#' pystr_split("1,,2,3", ",") 28#' pystr_split("a--b--c", "--") 29#' 30#' @export 31pystr_split <- function(str, sep=" ", maxsplit=nchar(str)-1) { 32 return(mapply(pystr_split_, str, sep, maxsplit, SIMPLIFY=FALSE, USE.NAMES=FALSE)) 33} 34 35pystr_split_ <- function(str, sep, maxsplit) { 36 if(maxsplit == 0) { 37 return(str) 38 } 39 40 if(sep == "") { 41 splits = strsplit(str, sep)[[1]] 42 43 if(maxsplit >= nchar(str) - 1) { 44 return(splits) 45 } 46 47 first = splits[1:maxsplit] 48 last = pystr_join(splits[(maxsplit + 1):length(splits)], "") 49 return(c(first, last)) 50 } 51 52 num_splits = min(pystr_count(str, sep), maxsplit) 53 cum_splits = 0 54 elements = c() 55 remaining = str 56 57 while(cum_splits < num_splits) { 58 if(remaining == "") {return(elements)} 59 parts = pystr_partition(remaining, sep)[[1]] 60 elements = c(elements, parts[1]) 61 remaining = parts[3] 62 cum_splits = cum_splits + 1 63 } 64 65 return(c(elements, remaining)) 66} 67