1#' Match function 2#' 3#' @param data character vector to match against 4#' @param pattern regular expression to use for matching 5#' @param global use global matching 6#' @param options regular expression options 7#' @param locations rather than returning the values of the matched (or 8#' captured) string, return a \code{data.frame} of the match locations in the 9#' string. 10#' @param ... options passed to regexpr or gregexpr 11#' @return if no captures, returns a logical vector the same length as the 12#' input character vector specifying if the relevant value matched or not. If 13#' there are captures in the regular expression, returns a \code{data.frame} with a 14#' column for each capture group. If \code{global} is \code{TRUE}, returns a 15#' list of \code{data.frame}s. 16#' @seealso \code{\link{regexp}} Section "Perl-like Regular Expressions" for a 17#' discussion of the supported options 18#' @examples 19#' string <- c("this is a", "test string") 20#' re_matches(string, rex("test")) # FALSE FALSE 21#' 22#' # named capture 23#' re_matches(string, rex(capture(alphas, name = "first_word"), space, 24#' capture(alphas, name = "second_word"))) 25#' # first_word second_word 26#' # 1 this is 27#' # 2 test string 28#' 29#' # capture returns NA when it fails to match 30#' re_matches(string, rex(capture("test"))) 31#' # 1 32#' # 1 test 33#' # 2 <NA> 34#' @aliases matches m 35#' @export re_matches matches m 36re_matches <- matches <- m <- function(data, pattern, global = FALSE, options = NULL, locations = FALSE, ...) { 37 38 pattern <- add_options(pattern, options) 39 40 process_matches <- function(match, string) { 41 42 if(no_capture(match)) { 43 44 # if no capture and no location just return if the regex matched 45 if(!locations) { 46 return(match != -1L) 47 } 48 49 # else return a data frame of the start and end locations 50 match[ match == -1L ] <- NA_integer_ 51 starts <- match 52 attributes(starts) <- NULL 53 54 lengths <- attr(match, "match.length") 55 ends <- starts + lengths - 1L 56 57 return(data.frame(start = starts, end = ends)) 58 } 59 60 # if a capture return a data frame with the capture results for each string 61 starts <- attr(match, "capture.start") 62 lengths <- attr(match, "capture.length") 63 ends <- starts + lengths - 1L 64 65 not_matched <- starts == -1L 66 67 strings <- substring(string, starts, ends) 68 69 strings[not_matched] <- NA_character_ 70 71 res <- matrix(ncol = ncol(starts), strings) 72 73 nms <- auto_name(attr(match, "capture.names")) 74 75 if (!locations) { 76 colnames(res) <- nms 77 return(as.data.frame(res, stringsAsFactors = FALSE, check.names = FALSE)) 78 } 79 80 starts[not_matched] <- NA_integer_ 81 82 ends[not_matched] <- NA_integer_ 83 84 indexes <- unlist(lapply(seq_len(ncol(res)), function(x) { 85 seq(x, by = ncol(res), length.out = 3) 86 })) 87 88 full <- data.frame(res, starts, ends, stringsAsFactors = FALSE, check.names = FALSE)[, indexes, drop = FALSE] 89 full_names <- unlist(Map(function(name) c(name, paste(sep=".", name, c("start", "end"))), nms, USE.NAMES=F)) 90 colnames(full) <- full_names 91 92 full 93 } 94 95 if(global %==% TRUE) { 96 mapply(process_matches, gregexpr(pattern = pattern, data, perl = TRUE, ...), data, SIMPLIFY = FALSE) 97 } 98 else { 99 process_matches(regexpr(pattern = pattern, data, perl = TRUE, ...), data) 100 } 101} 102 103#' Substitute regular expressions in a string with another string. 104#' 105#' @param data character vector to substitute 106#' @param pattern regular expression to match 107#' @param replacement replacement text to use 108#' @param global substitute all occurrences 109#' @param options option flags 110#' @param ... options passed to sub or gsub 111#' @seealso \code{\link{regexp}} Section "Perl-like Regular Expressions" for a 112#' discussion of the supported options 113#' @examples 114#' string <- c("this is a Test", "string") 115#' re_substitutes(string, "test", "not a test", options = "insensitive") 116#' re_substitutes(string, "i", "x", global = TRUE) 117#' re_substitutes(string, "(test)", "not a \\1", options = "insensitive") 118#' @aliases substitutes s 119#' @export re_substitutes substitutes s 120re_substitutes <- substitutes <- s <- function(data, pattern, replacement, global = FALSE, options = NULL, ...) { 121 pattern <- add_options(pattern, options) 122 method <- if (isTRUE(global)) gsub else sub 123 method(x = data, pattern = pattern, replacement = replacement, perl = TRUE, ...) 124} 125 126add_options <- function(pattern, options) { 127 if (!is.null(options)) { 128 options <- match_args(options, names(option_map)) 129 p("(?", p(option_map[options]), ")", pattern) 130 } 131 else { 132 pattern 133 } 134} 135 136match_args <- function(arg, choices) { 137 matches <- pmatch(arg, choices) 138 if (any(is.na(matches))) { 139 stop(gettextf("'arg' should be one of %s", paste(dQuote(choices), 140 collapse = ", ")), domain = NA) 141 } 142 choices[matches] 143} 144 145option_map <- c( 146 "insensitive" = "i", 147 "multi-line" = "m", 148 "single-line" = "s", 149 "extended" = "x", 150 "ungreedy" = "U" 151 ) 152 153no_capture <- function(match) { 154 is.null(attr(match, "capture.start", exact = TRUE)) 155} 156 157auto_name <- function(names) { 158 missing <- names == "" 159 if (all(!missing)) { 160 return(names) 161 } 162 names[missing] <- seq_along(names)[missing] 163 names 164} 165