1#' Replace matched patterns in a string.
2#'
3#' Vectorised over `string`, `pattern` and `replacement`.
4#'
5#' @inheritParams str_detect
6#' @param pattern Pattern to look for.
7#'
8#'   The default interpretation is a regular expression, as described
9#'   in [stringi::stringi-search-regex]. Control options with
10#'   [regex()].
11#'
12#'   Match a fixed string (i.e. by comparing only bytes), using
13#'   [fixed()]. This is fast, but approximate. Generally,
14#'   for matching human text, you'll want [coll()] which
15#'   respects character matching rules for the specified locale.
16#' @param replacement A character vector of replacements. Should be either
17#'   length one, or the same length as `string` or `pattern`.
18#'   References of the form `\1`, `\2`, etc will be replaced with
19#'   the contents of the respective matched group (created by `()`).
20#'
21#'   To perform multiple replacements in each element of `string`,
22#'   pass a named vector (`c(pattern1 = replacement1)`) to
23#'   `str_replace_all`. Alternatively, pass a function to
24#'   `replacement`: it will be called once for each match and its
25#'   return value will be used to replace the match.
26#'
27#'   To replace the complete string with `NA`, use
28#'   `replacement = NA_character_`.
29#' @return A character vector.
30#' @seealso [str_replace_na()] to turn missing values into "NA";
31#'   [stri_replace()] for the underlying implementation.
32#' @export
33#' @examples
34#' fruits <- c("one apple", "two pears", "three bananas")
35#' str_replace(fruits, "[aeiou]", "-")
36#' str_replace_all(fruits, "[aeiou]", "-")
37#' str_replace_all(fruits, "[aeiou]", toupper)
38#' str_replace_all(fruits, "b", NA_character_)
39#'
40#' str_replace(fruits, "([aeiou])", "")
41#' str_replace(fruits, "([aeiou])", "\\1\\1")
42#' str_replace(fruits, "[aeiou]", c("1", "2", "3"))
43#' str_replace(fruits, c("a", "e", "i"), "-")
44#'
45#' # If you want to apply multiple patterns and replacements to the same
46#' # string, pass a named vector to pattern.
47#' fruits %>%
48#'   str_c(collapse = "---") %>%
49#'   str_replace_all(c("one" = "1", "two" = "2", "three" = "3"))
50#'
51#' # Use a function for more sophisticated replacement. This example
52#' # replaces colour names with their hex values.
53#' colours <- str_c("\\b", colors(), "\\b", collapse="|")
54#' col2hex <- function(col) {
55#'   rgb <- col2rgb(col)
56#'   rgb(rgb["red", ], rgb["green", ], rgb["blue", ], max = 255)
57#' }
58#'
59#' x <- c(
60#'   "Roses are red, violets are blue",
61#'   "My favourite colour is green"
62#' )
63#' str_replace_all(x, colours, col2hex)
64str_replace <- function(string, pattern, replacement) {
65  if (!missing(replacement) && is.function(replacement)) {
66    return(str_transform(string, pattern, replacement))
67  }
68
69  switch(type(pattern),
70    empty = stop("Empty `pattern` not supported", call. = FALSE),
71    bound = stop("Boundary `pattern` not supported", call. = FALSE),
72    fixed = stri_replace_first_fixed(string, pattern, replacement,
73      opts_fixed = opts(pattern)),
74    coll  = stri_replace_first_coll(string, pattern, replacement,
75      opts_collator = opts(pattern)),
76    regex = stri_replace_first_regex(string, pattern, fix_replacement(replacement),
77      opts_regex = opts(pattern))
78  )
79}
80
81#' @export
82#' @rdname str_replace
83str_replace_all <- function(string, pattern, replacement) {
84  if (!missing(replacement) && is.function(replacement)) {
85    return(str_transform_all(string, pattern, replacement))
86  }
87
88
89  if (!is.null(names(pattern))) {
90    vec <- FALSE
91    replacement <- unname(pattern)
92    pattern[] <- names(pattern)
93  } else {
94    vec <- TRUE
95  }
96
97  switch(type(pattern),
98    empty = stop("Empty `pattern`` not supported", call. = FALSE),
99    bound = stop("Boundary `pattern` not supported", call. = FALSE),
100    fixed = stri_replace_all_fixed(string, pattern, replacement,
101      vectorize_all = vec, opts_fixed = opts(pattern)),
102    coll  = stri_replace_all_coll(string, pattern, replacement,
103      vectorize_all = vec, opts_collator = opts(pattern)),
104    regex = stri_replace_all_regex(string, pattern, fix_replacement(replacement),
105      vectorize_all = vec, opts_regex = opts(pattern))
106  )
107}
108
109fix_replacement <- function(x) {
110  if (!is.character(x)) {
111    stop("`replacement` must be a character vector", call. = FALSE)
112  }
113
114  vapply(x, fix_replacement_one, character(1), USE.NAMES = FALSE)
115}
116
117fix_replacement_one <- function(x) {
118  if (is.na(x)) {
119    return(x)
120  }
121
122  chars <- str_split(x, "")[[1]]
123  out <- character(length(chars))
124  escaped <- logical(length(chars))
125
126  in_escape <- FALSE
127  for (i in seq_along(chars)) {
128    escaped[[i]] <- in_escape
129    char <- chars[[i]]
130
131    if (in_escape) {
132      # Escape character not printed previously so must include here
133      if (char == "$") {
134        out[[i]] <- "\\\\$"
135      } else if (char >= "0" && char <= "9") {
136        out[[i]] <- paste0("$", char)
137      } else {
138        out[[i]] <- paste0("\\", char)
139      }
140
141      in_escape <- FALSE
142    } else {
143      if (char == "$") {
144        out[[i]] <- "\\$"
145      } else if (char == "\\") {
146        in_escape <- TRUE
147      } else {
148        out[[i]] <- char
149      }
150    }
151  }
152
153  # tibble::tibble(chars, out, escaped)
154  paste0(out, collapse = "")
155}
156
157
158#' Turn NA into "NA"
159#'
160#' @inheritParams str_replace
161#' @param replacement A single string.
162#' @export
163#' @examples
164#' str_replace_na(c(NA, "abc", "def"))
165str_replace_na <- function(string, replacement = "NA") {
166  stri_replace_na(string, replacement)
167}
168
169
170str_transform <- function(string, pattern, replacement) {
171  loc <- str_locate(string, pattern)
172  str_sub(string, loc, omit_na = TRUE) <- replacement(str_sub(string, loc))
173  string
174}
175str_transform_all <- function(string, pattern, replacement) {
176  locs <- str_locate_all(string, pattern)
177
178  for (i in seq_along(string)) {
179    for (j in rev(seq_len(nrow(locs[[i]])))) {
180      loc <- locs[[i]]
181      str_sub(string[[i]], loc[j, 1], loc[j, 2]) <- replacement(str_sub(string[[i]], loc[j, 1], loc[j, 2]))
182    }
183  }
184
185  string
186}
187