1#' Replace matched patterns in a string. 2#' 3#' Vectorised over `string`, `pattern` and `replacement`. 4#' 5#' @inheritParams str_detect 6#' @param pattern Pattern to look for. 7#' 8#' The default interpretation is a regular expression, as described 9#' in [stringi::stringi-search-regex]. Control options with 10#' [regex()]. 11#' 12#' Match a fixed string (i.e. by comparing only bytes), using 13#' [fixed()]. This is fast, but approximate. Generally, 14#' for matching human text, you'll want [coll()] which 15#' respects character matching rules for the specified locale. 16#' @param replacement A character vector of replacements. Should be either 17#' length one, or the same length as `string` or `pattern`. 18#' References of the form `\1`, `\2`, etc will be replaced with 19#' the contents of the respective matched group (created by `()`). 20#' 21#' To perform multiple replacements in each element of `string`, 22#' pass a named vector (`c(pattern1 = replacement1)`) to 23#' `str_replace_all`. Alternatively, pass a function to 24#' `replacement`: it will be called once for each match and its 25#' return value will be used to replace the match. 26#' 27#' To replace the complete string with `NA`, use 28#' `replacement = NA_character_`. 29#' @return A character vector. 30#' @seealso [str_replace_na()] to turn missing values into "NA"; 31#' [stri_replace()] for the underlying implementation. 32#' @export 33#' @examples 34#' fruits <- c("one apple", "two pears", "three bananas") 35#' str_replace(fruits, "[aeiou]", "-") 36#' str_replace_all(fruits, "[aeiou]", "-") 37#' str_replace_all(fruits, "[aeiou]", toupper) 38#' str_replace_all(fruits, "b", NA_character_) 39#' 40#' str_replace(fruits, "([aeiou])", "") 41#' str_replace(fruits, "([aeiou])", "\\1\\1") 42#' str_replace(fruits, "[aeiou]", c("1", "2", "3")) 43#' str_replace(fruits, c("a", "e", "i"), "-") 44#' 45#' # If you want to apply multiple patterns and replacements to the same 46#' # string, pass a named vector to pattern. 47#' fruits %>% 48#' str_c(collapse = "---") %>% 49#' str_replace_all(c("one" = "1", "two" = "2", "three" = "3")) 50#' 51#' # Use a function for more sophisticated replacement. This example 52#' # replaces colour names with their hex values. 53#' colours <- str_c("\\b", colors(), "\\b", collapse="|") 54#' col2hex <- function(col) { 55#' rgb <- col2rgb(col) 56#' rgb(rgb["red", ], rgb["green", ], rgb["blue", ], max = 255) 57#' } 58#' 59#' x <- c( 60#' "Roses are red, violets are blue", 61#' "My favourite colour is green" 62#' ) 63#' str_replace_all(x, colours, col2hex) 64str_replace <- function(string, pattern, replacement) { 65 if (!missing(replacement) && is.function(replacement)) { 66 return(str_transform(string, pattern, replacement)) 67 } 68 69 switch(type(pattern), 70 empty = stop("Empty `pattern` not supported", call. = FALSE), 71 bound = stop("Boundary `pattern` not supported", call. = FALSE), 72 fixed = stri_replace_first_fixed(string, pattern, replacement, 73 opts_fixed = opts(pattern)), 74 coll = stri_replace_first_coll(string, pattern, replacement, 75 opts_collator = opts(pattern)), 76 regex = stri_replace_first_regex(string, pattern, fix_replacement(replacement), 77 opts_regex = opts(pattern)) 78 ) 79} 80 81#' @export 82#' @rdname str_replace 83str_replace_all <- function(string, pattern, replacement) { 84 if (!missing(replacement) && is.function(replacement)) { 85 return(str_transform_all(string, pattern, replacement)) 86 } 87 88 89 if (!is.null(names(pattern))) { 90 vec <- FALSE 91 replacement <- unname(pattern) 92 pattern[] <- names(pattern) 93 } else { 94 vec <- TRUE 95 } 96 97 switch(type(pattern), 98 empty = stop("Empty `pattern`` not supported", call. = FALSE), 99 bound = stop("Boundary `pattern` not supported", call. = FALSE), 100 fixed = stri_replace_all_fixed(string, pattern, replacement, 101 vectorize_all = vec, opts_fixed = opts(pattern)), 102 coll = stri_replace_all_coll(string, pattern, replacement, 103 vectorize_all = vec, opts_collator = opts(pattern)), 104 regex = stri_replace_all_regex(string, pattern, fix_replacement(replacement), 105 vectorize_all = vec, opts_regex = opts(pattern)) 106 ) 107} 108 109fix_replacement <- function(x) { 110 if (!is.character(x)) { 111 stop("`replacement` must be a character vector", call. = FALSE) 112 } 113 114 vapply(x, fix_replacement_one, character(1), USE.NAMES = FALSE) 115} 116 117fix_replacement_one <- function(x) { 118 if (is.na(x)) { 119 return(x) 120 } 121 122 chars <- str_split(x, "")[[1]] 123 out <- character(length(chars)) 124 escaped <- logical(length(chars)) 125 126 in_escape <- FALSE 127 for (i in seq_along(chars)) { 128 escaped[[i]] <- in_escape 129 char <- chars[[i]] 130 131 if (in_escape) { 132 # Escape character not printed previously so must include here 133 if (char == "$") { 134 out[[i]] <- "\\\\$" 135 } else if (char >= "0" && char <= "9") { 136 out[[i]] <- paste0("$", char) 137 } else { 138 out[[i]] <- paste0("\\", char) 139 } 140 141 in_escape <- FALSE 142 } else { 143 if (char == "$") { 144 out[[i]] <- "\\$" 145 } else if (char == "\\") { 146 in_escape <- TRUE 147 } else { 148 out[[i]] <- char 149 } 150 } 151 } 152 153 # tibble::tibble(chars, out, escaped) 154 paste0(out, collapse = "") 155} 156 157 158#' Turn NA into "NA" 159#' 160#' @inheritParams str_replace 161#' @param replacement A single string. 162#' @export 163#' @examples 164#' str_replace_na(c(NA, "abc", "def")) 165str_replace_na <- function(string, replacement = "NA") { 166 stri_replace_na(string, replacement) 167} 168 169 170str_transform <- function(string, pattern, replacement) { 171 loc <- str_locate(string, pattern) 172 str_sub(string, loc, omit_na = TRUE) <- replacement(str_sub(string, loc)) 173 string 174} 175str_transform_all <- function(string, pattern, replacement) { 176 locs <- str_locate_all(string, pattern) 177 178 for (i in seq_along(string)) { 179 for (j in rev(seq_len(nrow(locs[[i]])))) { 180 loc <- locs[[i]] 181 str_sub(string[[i]], loc[j, 1], loc[j, 2]) <- replacement(str_sub(string[[i]], loc[j, 1], loc[j, 2])) 182 } 183 } 184 185 string 186} 187