1
2#' Compare two character vectors elementwise
3#'
4#' Its printed output is similar to calling `diff -u` at the command
5#' line.
6#'
7#' @param old First character vector.
8#' @param new Second character vector.
9#' @param max_dist Maximum distance to consider, or `Inf` for no limit.
10#'   If the LCS edit distance is larger than this, then the function
11#'   throws an error with class `"cli_diff_max_dist"`. (If you specify
12#'   `Inf` the real limit is `.Machine$integer.max` but to reach this the
13#'   function would have to run a very long time.)
14#' @return A list that is a `cli_diff_chr` object, with a `format()` and a
15#' `print()` method. You can also access its members:
16#'  * `old` and `new` are the original inputs,
17#'  * `lcs` is a data frame of LCS edit that transform `old` into `new`.
18#'
19#' The `lcs` data frame has the following columns:
20#' * `operation`: one of `"match"`, `"delete"` or `"insert"`.
21#' * `offset`: offset in `old` for matches and deletions, offset in `new`
22#'   for insertions.
23#' * `length`: length of the operation, i.e. number of matching, deleted
24#'   or inserted elements.
25#' * `old_offset`: offset in `old` _after_ the operation.
26#' * `new_offset`: offset in `new` _after_ the operation.
27#'
28#' @family diff functions in cli
29#' @seealso The diffobj package for a much more comprehensive set of
30#' `diff`-like tools.
31#' @export
32#' @examples
33#' letters2 <- c("P", "R", "E", letters, "P", "O", "S", "T")
34#' letters2[11:16] <- c("M", "I", "D", "D", "L", "E")
35#' diff_chr(letters, letters2)
36
37diff_chr <- function(old, new, max_dist = Inf) {
38  stopifnot(
39    is.character(old),
40    is.character(new),
41    max_dist == Inf || is_count(max_dist)
42  )
43  max_dist2 <- as_max_dist(max_dist)
44
45  lcs <- .Call(clic_diff_chr, old, new, max_dist2)
46
47  if (max_dist2 != 0 && lcs[[4]] == max_dist2) {
48    cnd <- structure(
49      list(
50        message = paste0("diff edit distance is larger than ", max_dist),
51        max_dist = max_dist
52      ),
53      class = c("cli_diff_max_dist", "error", "condition")
54    )
55    stop(cnd)
56  }
57
58  op <- c("match", "delete", "insert")[lcs[[1]]]
59  lcs <- data.frame(
60    stringsAsFactors = FALSE,
61    operation = op,
62    offset = lcs[[2]],
63    length = lcs[[3]],
64    old_offset = cumsum(ifelse(op == "insert", 0, lcs[[3]])),
65    new_offset = cumsum(ifelse(op == "delete", 0, lcs[[3]]))
66  )
67
68  ret <- structure(
69    list(old = old, new = new, lcs = lcs),
70    class = c("cli_diff_chr", "cli_diff", "list")
71  )
72
73  ret
74}
75
76#' Compare two character strings, character by character
77#'
78#' Characters are defined by UTF-8 graphemes.
79#'
80#' @param old First string, must not be `NA`.
81#' @param new Second string, must not be `NA`.
82#' @inheritParams diff_chr
83#' @return A list that is a `cli_diff_str` object and also a
84#'   `cli_diff_chr` object, see [diff_str] for the details about its
85#'   structure.
86#'
87#' @family diff functions in cli
88#' @seealso The diffobj package for a much more comprehensive set of
89#' `diff`-like tools.
90#'
91#' @export
92#' @examples
93#' str1 <- "abcdefghijklmnopqrstuvwxyz"
94#' str2 <- "PREabcdefgMIDDLEnopqrstuvwxyzPOST"
95#' diff_str(str1, str2)
96
97diff_str <- function(old, new, max_dist = Inf) {
98  stopifnot(
99    is_string(old),
100    is_string(new)
101    # max_dist is checked in diff_chr
102  )
103
104  old1 <- utf8_graphemes(old)[[1]]
105  new1 <- utf8_graphemes(new)[[1]]
106
107  ret <- diff_chr(old1, new1, max_dist)
108
109  class(ret) <- c("cli_diff_str", class(ret))
110
111  ret
112}
113
114#' @export
115
116format.cli_diff_chr <- function(x, context = 3L, ...) {
117  stopifnot(context == Inf || is_count(context))
118  if (length(list(...)) > 0) {
119    warning("Extra arguments were ignored in `format.cli_diff_chr()`.")
120  }
121
122  chunks <- get_diff_chunks(x$lcs, context = context)
123  out <- lapply(
124    seq_len(nrow(chunks)),
125    format_chunk,
126    x = x,
127    chunks = chunks,
128    context = context
129  )
130
131  ret <- as.character(unlist(out))
132  if (context == Inf && length(ret) > 0) ret <- ret[-1]
133
134  ret
135}
136
137get_diff_chunks <- function(lcs, context = 3L) {
138  # the number of chunks is the number of non-matching sequences if
139  # context == 0, but short matching parts do not separate chunks
140  runs <- rle(lcs$operation != "match" | lcs$length <= 2 * context)
141  nchunks <- sum(runs$values)
142
143  # special case for a single match chunk
144  if (nrow(lcs) == 1 && lcs$operation == "match") {
145    nchunks <- if (context == Inf) 1 else 0
146  }
147
148  chunks <- data.frame(
149    op_begin   = integer(nchunks),    # first op in chunk
150    op_length  = integer(nchunks),    # number of operations in chunk
151    old_begin  = integer(nchunks),    # first line from `old` in chunk
152    old_length = integer(nchunks),    # number of lines from `old` in chunk
153    new_begin  = integer(nchunks),    # first line from `new` in chunk
154    new_length = integer(nchunks)     # number of lines from `new` in chunk
155  )
156
157  if (nchunks == 0) return(chunks)
158
159  # infer some data about the original diff input
160  old_off <- c(0, lcs$old_offset)
161  new_off <- c(0, lcs$new_offset)
162  old_size <- old_off[length(old_off)]
163  new_size <- new_off[length(new_off)]
164  old_empty <- old_size == 0
165  new_empty <- new_size == 0
166
167  # avoid working with Inf
168  if (context == Inf) context <- max(old_size, new_size)
169
170  # chunk starts at operation number sum(length) before it, plus 1, but
171  # at the end we change this to include the context chunks are well
172  chunks$op_begin  <- c(0, cumsum(runs$length))[which(runs$values)] + 1
173  chunks$op_length <- runs$lengths[runs$values]
174
175  # `old` positions are from `old_off`, but need to fix the boundaries
176  chunks$old_begin <- old_off[chunks$op_begin] - context + 1
177  chunks$old_begin[chunks$old_begin <= 1] <- if (old_empty) 0 else 1
178  old_end <- old_off[chunks$op_begin + chunks$op_length] + context
179  old_end[old_end > old_size] <- old_size
180  chunks$old_length <- old_end - chunks$old_begin + 1
181
182  # `new` positions are similar
183  chunks$new_begin <- new_off[chunks$op_begin] - context + 1
184  chunks$new_begin[chunks$new_begin <= 1] <- if (new_empty) 0 else 1
185  new_end <- new_off[chunks$op_begin + chunks$op_length] + context
186  new_end[new_end > new_size] <- new_size
187  chunks$new_length <- new_end - chunks$new_begin + 1
188
189  # change to include context chunks
190  if (context > 0) {
191    # calculae the end before updating the begin
192    op_end <- chunks$op_begin + chunks$op_length - 1 + 1
193    op_end[op_end > nrow(lcs)] <- nrow(lcs)
194    chunks$op_begin <- chunks$op_begin - 1
195    chunks$op_begin[chunks$op_begin == 0] <- 1
196    chunks$op_length <- op_end - chunks$op_begin + 1
197  }
198
199  chunks
200}
201
202format_chunk <- function(x, chunks, num, context) {
203  hdr <- paste0(
204    "@@ -",
205    chunks$old_begin[num],
206    if ((l <- chunks$old_length[num]) != 1) paste0(",", l),
207    " +",
208    chunks$new_begin[num],
209    if ((l <- chunks$new_length[num]) != 1) paste0(",", l),
210    " @@"
211  )
212
213  from <- chunks$op_begin[num]
214  to <- chunks$op_begin[num] + chunks$op_length[num] - 1
215
216  lines <- lapply(from:to, function(i) {
217    op <- x$lcs$operation[i]
218    off <- x$lcs$offset[i]
219    len <- x$lcs$length[i]
220    if (op == "match") {
221      if (len > context) {
222        if (i == from) {
223          # start later
224          off <- off + len - context
225          len <- context
226        } else {
227          # finish earlier
228          len <- context
229        }
230      }
231      paste0(" ", x$old[off + 1:len])
232
233    } else if (op == "delete") {
234      col_blue(paste0("-", x$old[off + 1:len]))
235
236    } else if (op == "insert") {
237      col_green(paste0("+", x$new[off + 1:len]))
238    }
239  })
240  c(hdr, lines)
241}
242
243#' @export
244
245print.cli_diff_chr <- function(x, ...) {
246  writeLines(format(x, ...))
247}
248
249#' @export
250
251format.cli_diff_str <- function(x, ...) {
252  if (length(list(...)) > 0) {
253    warning("Extra arguments were ignored in `format.cli_diff_chr()`.")
254  }
255
256  if (num_ansi_colors() == 1) {
257    format_diff_str_nocolor(x, ...)
258  } else {
259    format_diff_str_color(x, ...)
260  }
261}
262
263format_diff_str_color <- function(x, ...) {
264  out <- lapply(seq_len(nrow(x$lcs)), function(i) {
265    op <- x$lcs$operation[i]
266    off <- x$lcs$offset[i]
267    len <- x$lcs$length[i]
268    if (op == "match") {
269      paste0(x$old[off + 1:len], collapse = "")
270
271    } else if (op == "delete") {
272      bg_blue(col_black(paste0(x$old[off + 1:len], collapse = "")))
273
274    } else if (op == "insert") {
275      bg_green(col_black(paste0(x$new[off + 1:len], collapse = "")))
276    }
277  })
278
279  paste(out, collapse = "")
280}
281
282format_diff_str_nocolor <- function(x, ...) {
283  out <- lapply(seq_len(nrow(x$lcs)), function(i) {
284    op <- x$lcs$operation[i]
285    off <- x$lcs$offset[i]
286    len <- x$lcs$length[i]
287    if (op == "match") {
288      paste0(x$old[off + 1:len], collapse = "")
289
290    } else if (op == "delete") {
291      paste0(c("[-", x$old[off + 1:len], "-]"), collapse = "")
292
293    } else if (op == "insert") {
294      paste0(c("{+", x$new[off + 1:len], "+}"), collapse = "")
295    }
296  })
297
298  paste(out, collapse = "")
299}
300
301as_max_dist <- function(max_dist) {
302  if (max_dist == Inf) {
303    0L
304  } else {
305    as.integer(max_dist + 1L)
306  }
307}
308