1# kate: default-dictionary en_US
2
3## This file is part of the 'stringi' package for R.
4## Copyright (c) 2013-2021, Marek Gagolewski <https://www.gagolewski.com>
5## All rights reserved.
6##
7## Redistribution and use in source and binary forms, with or without
8## modification, are permitted provided that the following conditions are met:
9##
10## 1. Redistributions of source code must retain the above copyright notice,
11## this list of conditions and the following disclaimer.
12##
13## 2. Redistributions in binary form must reproduce the above copyright notice,
14## this list of conditions and the following disclaimer in the documentation
15## and/or other materials provided with the distribution.
16##
17## 3. Neither the name of the copyright holder nor the names of its
18## contributors may be used to endorse or promote products derived from
19## this software without specific prior written permission.
20##
21## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22## 'AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
23## BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24## FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25## HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
30## OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33
34#' @title
35#' Replace Pattern Occurrences
36#'
37#' @description
38#' These functions replace, with the given replacement string, every/first/last
39#' substring of the input that matches the specified \code{pattern}.
40#'
41#' @details
42#' By default, all the functions are vectorized over
43#' \code{str}, \code{pattern}, \code{replacement} (with recycling
44#' of the elements in the shorter vector if necessary).
45#' Input that is not part of any match is left unchanged;
46#' each match is replaced in the result by the replacement string.
47#'
48#' However, for \code{stri_replace_all*}, if \code{vectorize_all} is \code{FALSE},
49#' then each substring matching any of the supplied \code{pattern}s
50#' is replaced by a corresponding \code{replacement} string.
51#' In such a case, the vectorization is over \code{str},
52#' and - independently - over \code{pattern} and \code{replacement}.
53#' In other words, this is equivalent to something like
54#' \code{for (i in 1:npatterns) str <- stri_replace_all(str, pattern[i], replacement[i]}.
55#' Note that you must set \code{length(pattern) >= length(replacement)}.
56#'
57#' In case of \code{stri_replace_*_regex},
58#' the replacement string may contain references to capture groups
59#' (in round parentheses).
60#' References are of the form \code{$n}, where \code{n} is the number
61#' of the capture group (\code{$1} denotes the first group).
62#' For the literal \code{$},
63#' escape it with a backslash.
64#' Moreover, \code{${name}} are used for named capture groups.
65#'
66#' Note that \code{stri_replace_last_regex} searches from start to end,
67#' but skips overlapping matches, see the example below.
68#'
69#' \code{stri_replace}, \code{stri_replace_all}, \code{stri_replace_first},
70#' and \code{stri_replace_last} are convenience functions; they just call
71#' \code{stri_replace_*_*} variants, depending on the arguments used.
72#'
73#' If you wish to remove white-spaces from the start or end
74#' of a string, see \code{\link{stri_trim}}.
75#'
76#' @param str character vector; strings to search in
77#' @param pattern,regex,fixed,coll,charclass character vector;
78#'     search patterns; for more details refer to \link{stringi-search}
79#' @param replacement character vector with replacements for matched patterns
80#' @param opts_collator,opts_fixed,opts_regex a named list used to tune up
81#' the search engine's settings; see
82#' \code{\link{stri_opts_collator}}, \code{\link{stri_opts_fixed}},
83#' and \code{\link{stri_opts_regex}}, respectively; \code{NULL}
84#' for the defaults
85#' @param merge single logical value;
86#' should consecutive matches be merged into one string;
87#' \code{stri_replace_all_charclass} only
88#' @param vectorize_all single logical value;
89#' should each occurrence of a pattern in every string
90#' be replaced by a corresponding replacement string?;
91#' \code{stri_replace_all_*} only
92#' @param vectorise_all alias of \code{vectorize_all}
93#' @param mode single string;
94#' one of: \code{'first'} (the default), \code{'all'}, \code{'last'}
95#' @param ... supplementary arguments passed to the underlying functions,
96#' including additional settings for \code{opts_collator}, \code{opts_regex},
97#' \code{opts_fixed}, and so on
98#'
99#' @return All the functions return a character vector.
100#'
101#' @examples
102#' stri_replace_all_charclass('aaaa', '[a]', 'b', merge=c(TRUE, FALSE))
103#'
104#' stri_replace_all_charclass('a\nb\tc   d', '\\p{WHITE_SPACE}', ' ')
105#' stri_replace_all_charclass('a\nb\tc   d', '\\p{WHITE_SPACE}', ' ', merge=TRUE)
106#'
107#' s <- 'Lorem ipsum dolor sit amet, consectetur adipisicing elit.'
108#' stri_replace_all_fixed(s, ' ', '#')
109#' stri_replace_all_fixed(s, 'o', '0')
110#'
111#' stri_replace_all_fixed(c('1', 'NULL', '3'), 'NULL', NA)
112#'
113#' stri_replace_all_regex(s, ' .*? ', '#')
114#' stri_replace_all_regex(s, '(el|s)it', '1234')
115#' stri_replace_all_regex('abaca', 'a', c('!', '*'))
116#' stri_replace_all_regex('123|456|789', '(\\p{N}).(\\p{N})', '$2-$1')
117#' stri_replace_all_regex(c('stringi R', 'REXAMINE', '123'), '( R|R.)', ' r ')
118#'
119#' # named capture groups are available since ICU 55
120#' \dontrun{
121#' stri_replace_all_regex('words 123 and numbers 456',
122#'    '(?<numbers>[0-9]+)', '!${numbers}!')
123#' }
124#'
125#' # Compare the results:
126#' stri_replace_all_fixed('The quick brown fox jumped over the lazy dog.',
127#'      c('quick', 'brown', 'fox'), c('slow',  'black', 'bear'), vectorize_all=TRUE)
128#' stri_replace_all_fixed('The quick brown fox jumped over the lazy dog.',
129#'      c('quick', 'brown', 'fox'), c('slow',  'black', 'bear'), vectorize_all=FALSE)
130#'
131#' # Compare the results:
132#' stri_replace_all_fixed('The quicker brown fox jumped over the lazy dog.',
133#'      c('quick', 'brown', 'fox'), c('slow',  'black', 'bear'), vectorize_all=FALSE)
134#' stri_replace_all_regex('The quicker brown fox jumped over the lazy dog.',
135#'      '\\b'%s+%c('quick', 'brown', 'fox')%s+%'\\b', c('slow',  'black', 'bear'), vectorize_all=FALSE)
136#'
137#' # Searching for the last occurrence:
138#' # Note the difference - regex searches left to right, with no overlaps.
139#' stri_replace_last_fixed("agAGA", "aga", "*", case_insensitive=TRUE)
140#' stri_replace_last_regex("agAGA", "aga", "*", case_insensitive=TRUE)
141#'
142#' @family search_replace
143#' @export
144#' @rdname stri_replace
145stri_replace_all <- function(str, replacement, ..., regex, fixed, coll, charclass)
146{
147    providedarg <- c(
148        regex = !missing(regex),
149        fixed = !missing(fixed),
150        coll = !missing(coll),
151        charclass = !missing(charclass))
152
153    if (sum(providedarg) != 1)
154        stop("you have to specify either `regex`, `fixed`, `coll`, or `charclass`")
155
156    if (providedarg["regex"])
157        stri_replace_all_regex(str, regex, replacement, ...)
158    else if (providedarg["fixed"])
159        stri_replace_all_fixed(str, fixed, replacement, ...)
160    else if (providedarg["coll"])
161        stri_replace_all_coll(str, coll, replacement, ...)
162    else if (providedarg["charclass"])
163        stri_replace_all_charclass(str, charclass, replacement, ...)
164}
165
166
167#' @export
168#' @rdname stri_replace
169stri_replace_first <- function(str, replacement, ..., regex, fixed, coll, charclass)
170{
171    providedarg <- c(
172        regex = !missing(regex),
173        fixed = !missing(fixed),
174        coll = !missing(coll),
175        charclass = !missing(charclass))
176
177    if (sum(providedarg) != 1)
178        stop("you have to specify either `regex`, `fixed`, `coll`, or `charclass`")
179
180    if (providedarg["regex"])
181        stri_replace_first_regex(str, regex, replacement, ...)
182    else if (providedarg["fixed"])
183        stri_replace_first_fixed(str, fixed, replacement, ...)
184    else if (providedarg["coll"])
185        stri_replace_first_coll(str, coll, replacement, ...)
186    else if (providedarg["charclass"])
187        stri_replace_first_charclass(str, charclass, replacement, ...)
188}
189
190
191#' @export
192#' @rdname stri_replace
193stri_replace_last <- function(str, replacement, ..., regex, fixed, coll, charclass)
194{
195    providedarg <- c(
196        regex = !missing(regex),
197        fixed = !missing(fixed),
198        coll = !missing(coll),
199        charclass = !missing(charclass))
200
201    if (sum(providedarg) != 1)
202        stop("you have to specify either `regex`, `fixed`, `coll`, or `charclass`")
203
204    if (providedarg["regex"])
205        stri_replace_last_regex(str, regex, replacement, ...)
206    else if (providedarg["fixed"])
207        stri_replace_last_fixed(str, fixed, replacement, ...)
208    else if (providedarg["coll"])
209        stri_replace_last_coll(str, coll, replacement, ...)
210    else if (providedarg["charclass"])
211        stri_replace_last_charclass(str, charclass, replacement, ...)
212}
213
214
215#' @export
216#' @rdname stri_replace
217stri_replace <- function(str, replacement, ..., regex, fixed, coll, charclass,
218    mode = c("first", "all", "last"))
219{
220    # `first` is default for compatibility with stringr
221    mode <- match.arg(mode)  # this is slow
222
223    switch(mode,
224        first = stri_replace_first(str, replacement, ..., regex = regex,
225            fixed = fixed, coll = coll, charclass = charclass),
226        last = stri_replace_last(str, replacement, ..., regex = regex,
227            fixed = fixed, coll = coll, charclass = charclass),
228        all = stri_replace_all(str, replacement, ..., regex = regex,
229            fixed = fixed, coll = coll, charclass = charclass))
230}
231
232
233#' @export
234#' @rdname stri_replace
235stri_replace_all_charclass <- function(str, pattern, replacement, merge = FALSE,
236    vectorize_all = TRUE, vectorise_all = vectorize_all)
237{
238    if (!missing(vectorise_all))
239        vectorize_all <- vectorise_all
240
241    .Call(C_stri_replace_all_charclass, str, pattern, replacement, merge, vectorize_all)
242}
243
244
245#' @export
246#' @rdname stri_replace
247stri_replace_first_charclass <- function(str, pattern, replacement)
248{
249    .Call(C_stri_replace_first_charclass, str, pattern, replacement)
250}
251
252
253#' @export
254#' @rdname stri_replace
255stri_replace_last_charclass <- function(str, pattern, replacement)
256{
257    .Call(C_stri_replace_last_charclass, str, pattern, replacement)
258}
259
260
261#' @export
262#' @rdname stri_replace
263stri_replace_all_coll <- function(str, pattern, replacement,
264    vectorize_all = TRUE, vectorise_all = vectorize_all, ..., opts_collator = NULL)
265{
266    if (!missing(vectorise_all))
267        vectorize_all <- vectorise_all
268    if (!missing(...))
269        opts_collator <- do.call(stri_opts_collator, as.list(c(opts_collator, ...)))
270    .Call(C_stri_replace_all_coll, str, pattern, replacement, vectorize_all, opts_collator)
271}
272
273
274#' @export
275#' @rdname stri_replace
276stri_replace_first_coll <- function(str, pattern, replacement, ..., opts_collator = NULL)
277{
278    if (!missing(...))
279        opts_collator <- do.call(stri_opts_collator, as.list(c(opts_collator, ...)))
280    .Call(C_stri_replace_first_coll, str, pattern, replacement, opts_collator)
281}
282
283
284#' @export
285#' @rdname stri_replace
286stri_replace_last_coll <- function(str, pattern, replacement, ..., opts_collator = NULL)
287{
288    if (!missing(...))
289        opts_collator <- do.call(stri_opts_collator, as.list(c(opts_collator, ...)))
290    .Call(C_stri_replace_last_coll, str, pattern, replacement, opts_collator)
291}
292
293
294#' @export
295#' @rdname stri_replace
296stri_replace_all_fixed <- function(str, pattern, replacement,
297    vectorize_all = TRUE, vectorise_all = vectorize_all, ..., opts_fixed = NULL)
298{
299    if (!missing(vectorise_all))
300        vectorize_all <- vectorise_all
301    if (!missing(...))
302        opts_fixed <- do.call(stri_opts_fixed, as.list(c(opts_fixed, ...)))
303    .Call(C_stri_replace_all_fixed, str, pattern, replacement, vectorize_all, opts_fixed)
304}
305
306
307#' @export
308#' @rdname stri_replace
309stri_replace_first_fixed <- function(str, pattern, replacement, ..., opts_fixed = NULL)
310{
311    if (!missing(...))
312        opts_fixed <- do.call(stri_opts_fixed, as.list(c(opts_fixed, ...)))
313    .Call(C_stri_replace_first_fixed, str, pattern, replacement, opts_fixed)
314}
315
316
317#' @export
318#' @rdname stri_replace
319stri_replace_last_fixed <- function(str, pattern, replacement, ..., opts_fixed = NULL)
320{
321    if (!missing(...))
322        opts_fixed <- do.call(stri_opts_fixed, as.list(c(opts_fixed, ...)))
323    .Call(C_stri_replace_last_fixed, str, pattern, replacement, opts_fixed)
324}
325
326
327#' @export
328#' @rdname stri_replace
329stri_replace_all_regex <- function(str, pattern, replacement,
330    vectorize_all = TRUE, vectorise_all = vectorize_all, ..., opts_regex = NULL)
331{
332    if (!missing(vectorise_all))
333        vectorize_all <- vectorise_all
334    if (!missing(...))
335        opts_regex <- do.call(stri_opts_regex, as.list(c(opts_regex, ...)))
336    .Call(C_stri_replace_all_regex, str, pattern, replacement, vectorize_all, opts_regex)
337}
338
339
340#' @export
341#' @rdname stri_replace
342stri_replace_first_regex <- function(str, pattern, replacement, ..., opts_regex = NULL)
343{
344    if (!missing(...))
345        opts_regex <- do.call(stri_opts_regex, as.list(c(opts_regex, ...)))
346    .Call(C_stri_replace_first_regex, str, pattern, replacement, opts_regex)
347}
348
349
350#' @export
351#' @rdname stri_replace
352stri_replace_last_regex <- function(str, pattern, replacement, ..., opts_regex = NULL)
353{
354    if (!missing(...))
355        opts_regex <- do.call(stri_opts_regex, as.list(c(opts_regex, ...)))
356    .Call(C_stri_replace_last_regex, str, pattern, replacement, opts_regex)
357}
358
359
360#' Convert gsub-Style Replacement Strings
361#'
362#' @description
363#' Converts a \code{\link[base]{gsub}}-style replacement strings
364#' to those which can be used in \code{\link{stri_replace}}.
365#' In particular, \code{$} becomes \code{\\$} and \code{\\1} becomes \code{$1}.
366#'
367#' @param x character vector
368#'
369#' @return Returns a character vector.
370#'
371#' @family search_replace
372#' @export
373stri_replace_rstr <- function(x)
374{
375    .Call(C_stri_replace_rstr, x)
376}
377