1# kate: default-dictionary en_US 2 3## This file is part of the 'stringi' package for R. 4## Copyright (c) 2013-2021, Marek Gagolewski <https://www.gagolewski.com> 5## All rights reserved. 6## 7## Redistribution and use in source and binary forms, with or without 8## modification, are permitted provided that the following conditions are met: 9## 10## 1. Redistributions of source code must retain the above copyright notice, 11## this list of conditions and the following disclaimer. 12## 13## 2. Redistributions in binary form must reproduce the above copyright notice, 14## this list of conditions and the following disclaimer in the documentation 15## and/or other materials provided with the distribution. 16## 17## 3. Neither the name of the copyright holder nor the names of its 18## contributors may be used to endorse or promote products derived from 19## this software without specific prior written permission. 20## 21## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22## 'AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 23## BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24## FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25## HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 28## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 29## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 30## OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 31## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 33 34#' @title 35#' Replace Pattern Occurrences 36#' 37#' @description 38#' These functions replace, with the given replacement string, every/first/last 39#' substring of the input that matches the specified \code{pattern}. 40#' 41#' @details 42#' By default, all the functions are vectorized over 43#' \code{str}, \code{pattern}, \code{replacement} (with recycling 44#' of the elements in the shorter vector if necessary). 45#' Input that is not part of any match is left unchanged; 46#' each match is replaced in the result by the replacement string. 47#' 48#' However, for \code{stri_replace_all*}, if \code{vectorize_all} is \code{FALSE}, 49#' then each substring matching any of the supplied \code{pattern}s 50#' is replaced by a corresponding \code{replacement} string. 51#' In such a case, the vectorization is over \code{str}, 52#' and - independently - over \code{pattern} and \code{replacement}. 53#' In other words, this is equivalent to something like 54#' \code{for (i in 1:npatterns) str <- stri_replace_all(str, pattern[i], replacement[i]}. 55#' Note that you must set \code{length(pattern) >= length(replacement)}. 56#' 57#' In case of \code{stri_replace_*_regex}, 58#' the replacement string may contain references to capture groups 59#' (in round parentheses). 60#' References are of the form \code{$n}, where \code{n} is the number 61#' of the capture group (\code{$1} denotes the first group). 62#' For the literal \code{$}, 63#' escape it with a backslash. 64#' Moreover, \code{${name}} are used for named capture groups. 65#' 66#' Note that \code{stri_replace_last_regex} searches from start to end, 67#' but skips overlapping matches, see the example below. 68#' 69#' \code{stri_replace}, \code{stri_replace_all}, \code{stri_replace_first}, 70#' and \code{stri_replace_last} are convenience functions; they just call 71#' \code{stri_replace_*_*} variants, depending on the arguments used. 72#' 73#' If you wish to remove white-spaces from the start or end 74#' of a string, see \code{\link{stri_trim}}. 75#' 76#' @param str character vector; strings to search in 77#' @param pattern,regex,fixed,coll,charclass character vector; 78#' search patterns; for more details refer to \link{stringi-search} 79#' @param replacement character vector with replacements for matched patterns 80#' @param opts_collator,opts_fixed,opts_regex a named list used to tune up 81#' the search engine's settings; see 82#' \code{\link{stri_opts_collator}}, \code{\link{stri_opts_fixed}}, 83#' and \code{\link{stri_opts_regex}}, respectively; \code{NULL} 84#' for the defaults 85#' @param merge single logical value; 86#' should consecutive matches be merged into one string; 87#' \code{stri_replace_all_charclass} only 88#' @param vectorize_all single logical value; 89#' should each occurrence of a pattern in every string 90#' be replaced by a corresponding replacement string?; 91#' \code{stri_replace_all_*} only 92#' @param vectorise_all alias of \code{vectorize_all} 93#' @param mode single string; 94#' one of: \code{'first'} (the default), \code{'all'}, \code{'last'} 95#' @param ... supplementary arguments passed to the underlying functions, 96#' including additional settings for \code{opts_collator}, \code{opts_regex}, 97#' \code{opts_fixed}, and so on 98#' 99#' @return All the functions return a character vector. 100#' 101#' @examples 102#' stri_replace_all_charclass('aaaa', '[a]', 'b', merge=c(TRUE, FALSE)) 103#' 104#' stri_replace_all_charclass('a\nb\tc d', '\\p{WHITE_SPACE}', ' ') 105#' stri_replace_all_charclass('a\nb\tc d', '\\p{WHITE_SPACE}', ' ', merge=TRUE) 106#' 107#' s <- 'Lorem ipsum dolor sit amet, consectetur adipisicing elit.' 108#' stri_replace_all_fixed(s, ' ', '#') 109#' stri_replace_all_fixed(s, 'o', '0') 110#' 111#' stri_replace_all_fixed(c('1', 'NULL', '3'), 'NULL', NA) 112#' 113#' stri_replace_all_regex(s, ' .*? ', '#') 114#' stri_replace_all_regex(s, '(el|s)it', '1234') 115#' stri_replace_all_regex('abaca', 'a', c('!', '*')) 116#' stri_replace_all_regex('123|456|789', '(\\p{N}).(\\p{N})', '$2-$1') 117#' stri_replace_all_regex(c('stringi R', 'REXAMINE', '123'), '( R|R.)', ' r ') 118#' 119#' # named capture groups are available since ICU 55 120#' \dontrun{ 121#' stri_replace_all_regex('words 123 and numbers 456', 122#' '(?<numbers>[0-9]+)', '!${numbers}!') 123#' } 124#' 125#' # Compare the results: 126#' stri_replace_all_fixed('The quick brown fox jumped over the lazy dog.', 127#' c('quick', 'brown', 'fox'), c('slow', 'black', 'bear'), vectorize_all=TRUE) 128#' stri_replace_all_fixed('The quick brown fox jumped over the lazy dog.', 129#' c('quick', 'brown', 'fox'), c('slow', 'black', 'bear'), vectorize_all=FALSE) 130#' 131#' # Compare the results: 132#' stri_replace_all_fixed('The quicker brown fox jumped over the lazy dog.', 133#' c('quick', 'brown', 'fox'), c('slow', 'black', 'bear'), vectorize_all=FALSE) 134#' stri_replace_all_regex('The quicker brown fox jumped over the lazy dog.', 135#' '\\b'%s+%c('quick', 'brown', 'fox')%s+%'\\b', c('slow', 'black', 'bear'), vectorize_all=FALSE) 136#' 137#' # Searching for the last occurrence: 138#' # Note the difference - regex searches left to right, with no overlaps. 139#' stri_replace_last_fixed("agAGA", "aga", "*", case_insensitive=TRUE) 140#' stri_replace_last_regex("agAGA", "aga", "*", case_insensitive=TRUE) 141#' 142#' @family search_replace 143#' @export 144#' @rdname stri_replace 145stri_replace_all <- function(str, replacement, ..., regex, fixed, coll, charclass) 146{ 147 providedarg <- c( 148 regex = !missing(regex), 149 fixed = !missing(fixed), 150 coll = !missing(coll), 151 charclass = !missing(charclass)) 152 153 if (sum(providedarg) != 1) 154 stop("you have to specify either `regex`, `fixed`, `coll`, or `charclass`") 155 156 if (providedarg["regex"]) 157 stri_replace_all_regex(str, regex, replacement, ...) 158 else if (providedarg["fixed"]) 159 stri_replace_all_fixed(str, fixed, replacement, ...) 160 else if (providedarg["coll"]) 161 stri_replace_all_coll(str, coll, replacement, ...) 162 else if (providedarg["charclass"]) 163 stri_replace_all_charclass(str, charclass, replacement, ...) 164} 165 166 167#' @export 168#' @rdname stri_replace 169stri_replace_first <- function(str, replacement, ..., regex, fixed, coll, charclass) 170{ 171 providedarg <- c( 172 regex = !missing(regex), 173 fixed = !missing(fixed), 174 coll = !missing(coll), 175 charclass = !missing(charclass)) 176 177 if (sum(providedarg) != 1) 178 stop("you have to specify either `regex`, `fixed`, `coll`, or `charclass`") 179 180 if (providedarg["regex"]) 181 stri_replace_first_regex(str, regex, replacement, ...) 182 else if (providedarg["fixed"]) 183 stri_replace_first_fixed(str, fixed, replacement, ...) 184 else if (providedarg["coll"]) 185 stri_replace_first_coll(str, coll, replacement, ...) 186 else if (providedarg["charclass"]) 187 stri_replace_first_charclass(str, charclass, replacement, ...) 188} 189 190 191#' @export 192#' @rdname stri_replace 193stri_replace_last <- function(str, replacement, ..., regex, fixed, coll, charclass) 194{ 195 providedarg <- c( 196 regex = !missing(regex), 197 fixed = !missing(fixed), 198 coll = !missing(coll), 199 charclass = !missing(charclass)) 200 201 if (sum(providedarg) != 1) 202 stop("you have to specify either `regex`, `fixed`, `coll`, or `charclass`") 203 204 if (providedarg["regex"]) 205 stri_replace_last_regex(str, regex, replacement, ...) 206 else if (providedarg["fixed"]) 207 stri_replace_last_fixed(str, fixed, replacement, ...) 208 else if (providedarg["coll"]) 209 stri_replace_last_coll(str, coll, replacement, ...) 210 else if (providedarg["charclass"]) 211 stri_replace_last_charclass(str, charclass, replacement, ...) 212} 213 214 215#' @export 216#' @rdname stri_replace 217stri_replace <- function(str, replacement, ..., regex, fixed, coll, charclass, 218 mode = c("first", "all", "last")) 219{ 220 # `first` is default for compatibility with stringr 221 mode <- match.arg(mode) # this is slow 222 223 switch(mode, 224 first = stri_replace_first(str, replacement, ..., regex = regex, 225 fixed = fixed, coll = coll, charclass = charclass), 226 last = stri_replace_last(str, replacement, ..., regex = regex, 227 fixed = fixed, coll = coll, charclass = charclass), 228 all = stri_replace_all(str, replacement, ..., regex = regex, 229 fixed = fixed, coll = coll, charclass = charclass)) 230} 231 232 233#' @export 234#' @rdname stri_replace 235stri_replace_all_charclass <- function(str, pattern, replacement, merge = FALSE, 236 vectorize_all = TRUE, vectorise_all = vectorize_all) 237{ 238 if (!missing(vectorise_all)) 239 vectorize_all <- vectorise_all 240 241 .Call(C_stri_replace_all_charclass, str, pattern, replacement, merge, vectorize_all) 242} 243 244 245#' @export 246#' @rdname stri_replace 247stri_replace_first_charclass <- function(str, pattern, replacement) 248{ 249 .Call(C_stri_replace_first_charclass, str, pattern, replacement) 250} 251 252 253#' @export 254#' @rdname stri_replace 255stri_replace_last_charclass <- function(str, pattern, replacement) 256{ 257 .Call(C_stri_replace_last_charclass, str, pattern, replacement) 258} 259 260 261#' @export 262#' @rdname stri_replace 263stri_replace_all_coll <- function(str, pattern, replacement, 264 vectorize_all = TRUE, vectorise_all = vectorize_all, ..., opts_collator = NULL) 265{ 266 if (!missing(vectorise_all)) 267 vectorize_all <- vectorise_all 268 if (!missing(...)) 269 opts_collator <- do.call(stri_opts_collator, as.list(c(opts_collator, ...))) 270 .Call(C_stri_replace_all_coll, str, pattern, replacement, vectorize_all, opts_collator) 271} 272 273 274#' @export 275#' @rdname stri_replace 276stri_replace_first_coll <- function(str, pattern, replacement, ..., opts_collator = NULL) 277{ 278 if (!missing(...)) 279 opts_collator <- do.call(stri_opts_collator, as.list(c(opts_collator, ...))) 280 .Call(C_stri_replace_first_coll, str, pattern, replacement, opts_collator) 281} 282 283 284#' @export 285#' @rdname stri_replace 286stri_replace_last_coll <- function(str, pattern, replacement, ..., opts_collator = NULL) 287{ 288 if (!missing(...)) 289 opts_collator <- do.call(stri_opts_collator, as.list(c(opts_collator, ...))) 290 .Call(C_stri_replace_last_coll, str, pattern, replacement, opts_collator) 291} 292 293 294#' @export 295#' @rdname stri_replace 296stri_replace_all_fixed <- function(str, pattern, replacement, 297 vectorize_all = TRUE, vectorise_all = vectorize_all, ..., opts_fixed = NULL) 298{ 299 if (!missing(vectorise_all)) 300 vectorize_all <- vectorise_all 301 if (!missing(...)) 302 opts_fixed <- do.call(stri_opts_fixed, as.list(c(opts_fixed, ...))) 303 .Call(C_stri_replace_all_fixed, str, pattern, replacement, vectorize_all, opts_fixed) 304} 305 306 307#' @export 308#' @rdname stri_replace 309stri_replace_first_fixed <- function(str, pattern, replacement, ..., opts_fixed = NULL) 310{ 311 if (!missing(...)) 312 opts_fixed <- do.call(stri_opts_fixed, as.list(c(opts_fixed, ...))) 313 .Call(C_stri_replace_first_fixed, str, pattern, replacement, opts_fixed) 314} 315 316 317#' @export 318#' @rdname stri_replace 319stri_replace_last_fixed <- function(str, pattern, replacement, ..., opts_fixed = NULL) 320{ 321 if (!missing(...)) 322 opts_fixed <- do.call(stri_opts_fixed, as.list(c(opts_fixed, ...))) 323 .Call(C_stri_replace_last_fixed, str, pattern, replacement, opts_fixed) 324} 325 326 327#' @export 328#' @rdname stri_replace 329stri_replace_all_regex <- function(str, pattern, replacement, 330 vectorize_all = TRUE, vectorise_all = vectorize_all, ..., opts_regex = NULL) 331{ 332 if (!missing(vectorise_all)) 333 vectorize_all <- vectorise_all 334 if (!missing(...)) 335 opts_regex <- do.call(stri_opts_regex, as.list(c(opts_regex, ...))) 336 .Call(C_stri_replace_all_regex, str, pattern, replacement, vectorize_all, opts_regex) 337} 338 339 340#' @export 341#' @rdname stri_replace 342stri_replace_first_regex <- function(str, pattern, replacement, ..., opts_regex = NULL) 343{ 344 if (!missing(...)) 345 opts_regex <- do.call(stri_opts_regex, as.list(c(opts_regex, ...))) 346 .Call(C_stri_replace_first_regex, str, pattern, replacement, opts_regex) 347} 348 349 350#' @export 351#' @rdname stri_replace 352stri_replace_last_regex <- function(str, pattern, replacement, ..., opts_regex = NULL) 353{ 354 if (!missing(...)) 355 opts_regex <- do.call(stri_opts_regex, as.list(c(opts_regex, ...))) 356 .Call(C_stri_replace_last_regex, str, pattern, replacement, opts_regex) 357} 358 359 360#' Convert gsub-Style Replacement Strings 361#' 362#' @description 363#' Converts a \code{\link[base]{gsub}}-style replacement strings 364#' to those which can be used in \code{\link{stri_replace}}. 365#' In particular, \code{$} becomes \code{\\$} and \code{\\1} becomes \code{$1}. 366#' 367#' @param x character vector 368#' 369#' @return Returns a character vector. 370#' 371#' @family search_replace 372#' @export 373stri_replace_rstr <- function(x) 374{ 375 .Call(C_stri_replace_rstr, x) 376} 377