1 2#' Compare two character vectors elementwise 3#' 4#' Its printed output is similar to calling `diff -u` at the command 5#' line. 6#' 7#' @param old First character vector. 8#' @param new Second character vector. 9#' @param max_dist Maximum distance to consider, or `Inf` for no limit. 10#' If the LCS edit distance is larger than this, then the function 11#' throws an error with class `"cli_diff_max_dist"`. (If you specify 12#' `Inf` the real limit is `.Machine$integer.max` but to reach this the 13#' function would have to run a very long time.) 14#' @return A list that is a `cli_diff_chr` object, with a `format()` and a 15#' `print()` method. You can also access its members: 16#' * `old` and `new` are the original inputs, 17#' * `lcs` is a data frame of LCS edit that transform `old` into `new`. 18#' 19#' The `lcs` data frame has the following columns: 20#' * `operation`: one of `"match"`, `"delete"` or `"insert"`. 21#' * `offset`: offset in `old` for matches and deletions, offset in `new` 22#' for insertions. 23#' * `length`: length of the operation, i.e. number of matching, deleted 24#' or inserted elements. 25#' * `old_offset`: offset in `old` _after_ the operation. 26#' * `new_offset`: offset in `new` _after_ the operation. 27#' 28#' @family diff functions in cli 29#' @seealso The diffobj package for a much more comprehensive set of 30#' `diff`-like tools. 31#' @export 32#' @examples 33#' letters2 <- c("P", "R", "E", letters, "P", "O", "S", "T") 34#' letters2[11:16] <- c("M", "I", "D", "D", "L", "E") 35#' diff_chr(letters, letters2) 36 37diff_chr <- function(old, new, max_dist = Inf) { 38 stopifnot( 39 is.character(old), 40 is.character(new), 41 max_dist == Inf || is_count(max_dist) 42 ) 43 max_dist2 <- as_max_dist(max_dist) 44 45 lcs <- .Call(clic_diff_chr, old, new, max_dist2) 46 47 if (max_dist2 != 0 && lcs[[4]] == max_dist2) { 48 cnd <- structure( 49 list( 50 message = paste0("diff edit distance is larger than ", max_dist), 51 max_dist = max_dist 52 ), 53 class = c("cli_diff_max_dist", "error", "condition") 54 ) 55 stop(cnd) 56 } 57 58 op <- c("match", "delete", "insert")[lcs[[1]]] 59 lcs <- data.frame( 60 stringsAsFactors = FALSE, 61 operation = op, 62 offset = lcs[[2]], 63 length = lcs[[3]], 64 old_offset = cumsum(ifelse(op == "insert", 0, lcs[[3]])), 65 new_offset = cumsum(ifelse(op == "delete", 0, lcs[[3]])) 66 ) 67 68 ret <- structure( 69 list(old = old, new = new, lcs = lcs), 70 class = c("cli_diff_chr", "cli_diff", "list") 71 ) 72 73 ret 74} 75 76#' Compare two character strings, character by character 77#' 78#' Characters are defined by UTF-8 graphemes. 79#' 80#' @param old First string, must not be `NA`. 81#' @param new Second string, must not be `NA`. 82#' @inheritParams diff_chr 83#' @return A list that is a `cli_diff_str` object and also a 84#' `cli_diff_chr` object, see [diff_str] for the details about its 85#' structure. 86#' 87#' @family diff functions in cli 88#' @seealso The diffobj package for a much more comprehensive set of 89#' `diff`-like tools. 90#' 91#' @export 92#' @examples 93#' str1 <- "abcdefghijklmnopqrstuvwxyz" 94#' str2 <- "PREabcdefgMIDDLEnopqrstuvwxyzPOST" 95#' diff_str(str1, str2) 96 97diff_str <- function(old, new, max_dist = Inf) { 98 stopifnot( 99 is_string(old), 100 is_string(new) 101 # max_dist is checked in diff_chr 102 ) 103 104 old1 <- utf8_graphemes(old)[[1]] 105 new1 <- utf8_graphemes(new)[[1]] 106 107 ret <- diff_chr(old1, new1, max_dist) 108 109 class(ret) <- c("cli_diff_str", class(ret)) 110 111 ret 112} 113 114#' @export 115 116format.cli_diff_chr <- function(x, context = 3L, ...) { 117 stopifnot(context == Inf || is_count(context)) 118 if (length(list(...)) > 0) { 119 warning("Extra arguments were ignored in `format.cli_diff_chr()`.") 120 } 121 122 chunks <- get_diff_chunks(x$lcs, context = context) 123 out <- lapply( 124 seq_len(nrow(chunks)), 125 format_chunk, 126 x = x, 127 chunks = chunks, 128 context = context 129 ) 130 131 ret <- as.character(unlist(out)) 132 if (context == Inf && length(ret) > 0) ret <- ret[-1] 133 134 ret 135} 136 137get_diff_chunks <- function(lcs, context = 3L) { 138 # the number of chunks is the number of non-matching sequences if 139 # context == 0, but short matching parts do not separate chunks 140 runs <- rle(lcs$operation != "match" | lcs$length <= 2 * context) 141 nchunks <- sum(runs$values) 142 143 # special case for a single match chunk 144 if (nrow(lcs) == 1 && lcs$operation == "match") { 145 nchunks <- if (context == Inf) 1 else 0 146 } 147 148 chunks <- data.frame( 149 op_begin = integer(nchunks), # first op in chunk 150 op_length = integer(nchunks), # number of operations in chunk 151 old_begin = integer(nchunks), # first line from `old` in chunk 152 old_length = integer(nchunks), # number of lines from `old` in chunk 153 new_begin = integer(nchunks), # first line from `new` in chunk 154 new_length = integer(nchunks) # number of lines from `new` in chunk 155 ) 156 157 if (nchunks == 0) return(chunks) 158 159 # infer some data about the original diff input 160 old_off <- c(0, lcs$old_offset) 161 new_off <- c(0, lcs$new_offset) 162 old_size <- old_off[length(old_off)] 163 new_size <- new_off[length(new_off)] 164 old_empty <- old_size == 0 165 new_empty <- new_size == 0 166 167 # avoid working with Inf 168 if (context == Inf) context <- max(old_size, new_size) 169 170 # chunk starts at operation number sum(length) before it, plus 1, but 171 # at the end we change this to include the context chunks are well 172 chunks$op_begin <- c(0, cumsum(runs$length))[which(runs$values)] + 1 173 chunks$op_length <- runs$lengths[runs$values] 174 175 # `old` positions are from `old_off`, but need to fix the boundaries 176 chunks$old_begin <- old_off[chunks$op_begin] - context + 1 177 chunks$old_begin[chunks$old_begin <= 1] <- if (old_empty) 0 else 1 178 old_end <- old_off[chunks$op_begin + chunks$op_length] + context 179 old_end[old_end > old_size] <- old_size 180 chunks$old_length <- old_end - chunks$old_begin + 1 181 182 # `new` positions are similar 183 chunks$new_begin <- new_off[chunks$op_begin] - context + 1 184 chunks$new_begin[chunks$new_begin <= 1] <- if (new_empty) 0 else 1 185 new_end <- new_off[chunks$op_begin + chunks$op_length] + context 186 new_end[new_end > new_size] <- new_size 187 chunks$new_length <- new_end - chunks$new_begin + 1 188 189 # change to include context chunks 190 if (context > 0) { 191 # calculae the end before updating the begin 192 op_end <- chunks$op_begin + chunks$op_length - 1 + 1 193 op_end[op_end > nrow(lcs)] <- nrow(lcs) 194 chunks$op_begin <- chunks$op_begin - 1 195 chunks$op_begin[chunks$op_begin == 0] <- 1 196 chunks$op_length <- op_end - chunks$op_begin + 1 197 } 198 199 chunks 200} 201 202format_chunk <- function(x, chunks, num, context) { 203 hdr <- paste0( 204 "@@ -", 205 chunks$old_begin[num], 206 if ((l <- chunks$old_length[num]) != 1) paste0(",", l), 207 " +", 208 chunks$new_begin[num], 209 if ((l <- chunks$new_length[num]) != 1) paste0(",", l), 210 " @@" 211 ) 212 213 from <- chunks$op_begin[num] 214 to <- chunks$op_begin[num] + chunks$op_length[num] - 1 215 216 lines <- lapply(from:to, function(i) { 217 op <- x$lcs$operation[i] 218 off <- x$lcs$offset[i] 219 len <- x$lcs$length[i] 220 if (op == "match") { 221 if (len > context) { 222 if (i == from) { 223 # start later 224 off <- off + len - context 225 len <- context 226 } else { 227 # finish earlier 228 len <- context 229 } 230 } 231 paste0(" ", x$old[off + 1:len]) 232 233 } else if (op == "delete") { 234 col_blue(paste0("-", x$old[off + 1:len])) 235 236 } else if (op == "insert") { 237 col_green(paste0("+", x$new[off + 1:len])) 238 } 239 }) 240 c(hdr, lines) 241} 242 243#' @export 244 245print.cli_diff_chr <- function(x, ...) { 246 writeLines(format(x, ...)) 247} 248 249#' @export 250 251format.cli_diff_str <- function(x, ...) { 252 if (length(list(...)) > 0) { 253 warning("Extra arguments were ignored in `format.cli_diff_chr()`.") 254 } 255 256 if (num_ansi_colors() == 1) { 257 format_diff_str_nocolor(x, ...) 258 } else { 259 format_diff_str_color(x, ...) 260 } 261} 262 263format_diff_str_color <- function(x, ...) { 264 out <- lapply(seq_len(nrow(x$lcs)), function(i) { 265 op <- x$lcs$operation[i] 266 off <- x$lcs$offset[i] 267 len <- x$lcs$length[i] 268 if (op == "match") { 269 paste0(x$old[off + 1:len], collapse = "") 270 271 } else if (op == "delete") { 272 bg_blue(col_black(paste0(x$old[off + 1:len], collapse = ""))) 273 274 } else if (op == "insert") { 275 bg_green(col_black(paste0(x$new[off + 1:len], collapse = ""))) 276 } 277 }) 278 279 paste(out, collapse = "") 280} 281 282format_diff_str_nocolor <- function(x, ...) { 283 out <- lapply(seq_len(nrow(x$lcs)), function(i) { 284 op <- x$lcs$operation[i] 285 off <- x$lcs$offset[i] 286 len <- x$lcs$length[i] 287 if (op == "match") { 288 paste0(x$old[off + 1:len], collapse = "") 289 290 } else if (op == "delete") { 291 paste0(c("[-", x$old[off + 1:len], "-]"), collapse = "") 292 293 } else if (op == "insert") { 294 paste0(c("{+", x$new[off + 1:len], "+}"), collapse = "") 295 } 296 }) 297 298 paste(out, collapse = "") 299} 300 301as_max_dist <- function(max_dist) { 302 if (max_dist == Inf) { 303 0L 304 } else { 305 as.integer(max_dist + 1L) 306 } 307} 308