1# Copyright (C) 2021 Brodie Gaslam
2#
3# This file is part of "diffobj - Diffs for R Objects"
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 2 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# Go to <https://www.r-project.org/Licenses/GPL-2> for a copy of the license.
16
17#' Diffs for R Objects
18#'
19#' Generate a colorized diff of two R objects for an intuitive visualization of
20#' their differences.  See `vignette(package="diffobj", "diffobj")` for details.
21#'
22#' @import crayon
23#' @import methods
24#' @importFrom utils capture.output file_test packageVersion read.csv
25#' @importFrom stats ave frequency is.ts setNames
26#' @importFrom grDevices rgb
27#' @name diffobj-package
28#' @docType package
29
30NULL
31
32# Because all these functions are so similar, we have constructed them with a
33# function factory.  This allows us to easily maintain consistent formals during
34# initial development process when they have not been set in stone yet.
35
36make_diff_fun <- function(capt_fun) {
37  # nocov start
38  function(
39    target, current,
40    mode=gdo("mode"),
41    context=gdo("context"),
42    format=gdo("format"),
43    brightness=gdo("brightness"),
44    color.mode=gdo("color.mode"),
45    word.diff=gdo("word.diff"),
46    pager=gdo("pager"),
47    guides=gdo("guides"),
48    trim=gdo("trim"),
49    rds=gdo("rds"),
50    unwrap.atomic=gdo("unwrap.atomic"),
51    max.diffs=gdo("max.diffs"),
52    disp.width=gdo("disp.width"),
53    ignore.white.space=gdo("ignore.white.space"),
54    convert.hz.white.space=gdo("convert.hz.white.space"),
55    tab.stops=gdo("tab.stops"),
56    line.limit=gdo("line.limit"),
57    hunk.limit=gdo("hunk.limit"),
58    align=gdo("align"),
59    style=gdo("style"),
60    palette.of.styles=gdo("palette"),
61    frame=par_frame(),
62    interactive=gdo("interactive"),
63    term.colors=gdo("term.colors"),
64    tar.banner=NULL,
65    cur.banner=NULL,
66    strip.sgr=gdo("strip.sgr"),
67    sgr.supported=gdo("sgr.supported"),
68    extra=list()
69  ) {
70  # nocov end
71    frame    # force frame so that `par_frame` called in this context
72    call.dat <- extract_call(sys.calls(), frame)
73    target   # force target/current so if one missing we get an error here
74    current  # and not later
75
76    # Check args and evaluate all the auto-selection arguments
77
78    etc.proc <- check_args(
79      call=call.dat$call, tar.exp=call.dat$tar, cur.exp=call.dat$cur,
80      mode=mode, context=context, line.limit=line.limit, format=format,
81      brightness=brightness, color.mode=color.mode, pager=pager,
82      ignore.white.space=ignore.white.space, max.diffs=max.diffs,
83      align=align, disp.width=disp.width,
84      hunk.limit=hunk.limit, convert.hz.white.space=convert.hz.white.space,
85      tab.stops=tab.stops, style=style, palette.of.styles=palette.of.styles,
86      frame=frame, tar.banner=tar.banner, cur.banner=cur.banner, guides=guides,
87      rds=rds, trim=trim, word.diff=word.diff, unwrap.atomic=unwrap.atomic,
88      extra=extra, interactive=interactive, term.colors=term.colors,
89      strip.sgr=strip.sgr, sgr.supported=sgr.supported,
90      call.match=match.call()
91    )
92    # If in rds mode, try to see if either target or current reference an RDS
93
94    if(rds) {
95      target <- get_rds(target)
96      current <- get_rds(current)
97    }
98    # Force crayon to whatever ansi status we chose; note we must do this after
99    # touching vars in case someone passes `options(crayon.enabled=...)` as one
100    # of the arguments
101
102    # old.crayon.opt <- options(
103    #   crayon.enabled=
104    #     is(etc.proc@style, "StyleAnsi") ||
105    #     (!is(etc.proc@style, "StyleHtml") && etc.proc@sgr.supported)
106    # )
107    # on.exit(options(old.crayon.opt), add=TRUE)
108    err <- make_err_fun(sys.call())
109
110    # Compute gutter values so that we know correct widths to use for capture,
111    # etc. If not a base text type style, assume gutter and column padding are
112    # zero even though that may not always be correct
113
114    etc.proc@gutter <- gutter_dat(etc.proc)
115
116    col.pad.width <-
117      nchar2(etc.proc@style@text@pad.col, sgr.supported=etc.proc@sgr.supported)
118    gutt.width <- etc.proc@gutter@width
119
120    half.width <- as.integer((etc.proc@disp.width - col.pad.width) / 2)
121    etc.proc@line.width <-
122      max(etc.proc@disp.width, .min.width + gutt.width)
123    etc.proc@text.width <- etc.proc@line.width - gutt.width
124    etc.proc@line.width.half <- max(half.width, .min.width + gutt.width)
125    etc.proc@text.width.half <- etc.proc@line.width.half - gutt.width
126
127    # If in side by side mode already then we know we want half-width, and if
128    # width is less than 80 we know we want unitfied
129
130    if(etc.proc@mode == "auto" && etc.proc@disp.width < 80L)
131      etc.proc@mode <- "unified"
132    if(etc.proc@mode == "sidebyside") etc.proc <- sideBySide(etc.proc)
133
134    # Capture and diff
135
136    diff <- capt_fun(target, current, etc=etc.proc, err=err, extra)
137    diff
138  }
139}
140#' Diff \code{print}ed Objects
141#'
142#' Runs the diff between the \code{print} or \code{show} output produced by
143#' \code{target} and \code{current}.  Given the extensive parameter list, this
144#' documentation page is intended as a reference for all the \code{diff*}
145#' methods.  For a high level introduction see \code{vignette("diffobj")}.
146#'
147#' Almost all aspects of how the diffs are computed and displayed are
148#' controllable through the \code{diff*} methods parameters.  This results in a
149#' lengthy parameter list, but in practice you should rarely need to adjust
150#' anything past the \code{color.mode} parameter.  Default values are specified
151#' as options so that users may configure diffs in a persistent manner.
152#' \code{\link{gdo}} is a shorthand function to access \code{diffobj} options.
153#'
154#' Parameter order after \code{color.mode} is not guaranteed.  Future versions
155#' of \code{diffobj} may add parameters and re-order existing parameters past
156#' \code{color.mode}.
157#'
158#' This and other \code{diff*} functions are S4 generics that dispatch on the
159#' \code{target} and \code{current} parameters.  Methods with signature
160#' \code{c("ANY", "ANY")} are defined and act as the default methods.  You can
161#' use this to set up methods to pre-process or set specific parameters for
162#' selected classes that can then \code{callNextMethod} for the actual diff.
163#' Note that while the generics include \code{...} as an argument, none of the
164#' methods do.
165#'
166#' Strings are re-encoded to UTF-8 with \code{\link{enc2utf8}} prior to
167#' comparison to avoid encoding-only differences.
168#'
169#' The text representation of `target` and `current` should each have no more
170#' than ~INT_MAX/4 lines.
171#'
172#' @section Matrices and Data Frames:
173#'
174#' While \code{diffPrint} attempts to handle the default R behavior that wraps
175#' wide tables, the results are often sub-optimal.  A better approach is to set
176#' the \code{disp.width} parameter to a large enough value such that wrapping is
177#' not necessary, and a browser-based \code{pager}.  In the future we will add
178#' the capability to specify different capture widths and wrap widths so that
179#' this is an option for terminal output (see
180#' \href{https://github.com/brodieG/diffobj/issues/109}{issue 109}).
181#'
182#' One thing to keep in mind is that \code{diffPrint} is not designed to work
183#' with very large data frames.
184#'
185#' @export
186#' @seealso \code{\link{diffObj}}, \code{\link{diffStr}},
187#'   \code{\link{diffChr}} to compare character vectors directly,
188#'   \code{\link{diffDeparse}} to compare deparsed objects, \code{\link{ses}}
189#'   for a minimal and fast diff @param target the reference object
190#' @param target the reference object
191#' @param current the object being compared to \code{target}
192#' @param mode character(1L), one of:
193#'   \itemize{
194#'     \item \dQuote{unified}: diff mode used by \code{git diff}
195#'     \item \dQuote{sidebyside}: line up the differences side by side
196#'     \item \dQuote{context}: show the target and current hunks in their
197#'       entirety; this mode takes up a lot of screen space but makes it easier
198#'       to see what the objects actually look like
199#'     \item \dQuote{auto}: default mode; pick one of the above, will favor
200#'       \dQuote{sidebyside} unless \code{getOption("width")} is less than 80,
201#'       or in \code{diffPrint} and objects are dimensioned and do not fit side
202#'       by side, or in \code{diffChr}, \code{diffDeparse}, \code{diffFile} and
203#'       output does not fit in side by side without wrapping
204#'   }
205#' @param context integer(1L) how many lines of context are shown on either side
206#'   of differences (defaults to 2).  Set to \code{-1L} to allow as many as
207#'   there are.  Set to \dQuote{auto}  to display as many as 10 lines or as few
208#'   as 1 depending on whether total screen lines fit within the number of lines
209#'   specified in \code{line.limit}.  Alternatively pass the return value of
210#'   \code{\link{auto_context}} to fine tune the parameters of the auto context
211#'   calculation.
212#' @param format character(1L), controls the diff output format, one of:
213#'   \itemize{
214#'     \item \dQuote{auto}: to select output format based on terminal
215#'       capabilities; will attempt to use one of the ANSI formats if they
216#'       appear to be supported, and if not or if you are in the Rstudio console
217#'       it will attempt to use HTML and browser output if in interactive mode.
218#'     \item \dQuote{raw}: plain text
219#'     \item \dQuote{ansi8}: color and format diffs using basic ANSI escape
220#'       sequences
221#'     \item \dQuote{ansi256}: like \dQuote{ansi8}, except using the full range
222#'       of ANSI formatting options
223#'     \item \dQuote{html}: color and format using HTML markup; the resulting
224#'       string is processed with \code{\link{enc2utf8}} when output as a full
225#'       web page (see docs for \code{html.output} under \code{\link{Style}}).
226#'   }
227#'   Defaults to \dQuote{auto}.  See \code{palette.of.styles} for details
228#'   on customization, \code{\link{style}} for full control of output format.
229#'   See `pager` parameter for more discussion of Rstudio behavior.
230#' @param brightness character, one of \dQuote{light}, \dQuote{dark},
231#'   \dQuote{neutral}, useful for adjusting color scheme to light or dark
232#'   terminals.  \dQuote{neutral} by default.  See \code{\link{PaletteOfStyles}}
233#'   for details and limitations.  Advanced: you may specify brightness as a
234#'   function of \code{format}.  For example, if you typically wish to use a
235#'   \dQuote{dark} color scheme, except for when in \dQuote{html} format when
236#'   you prefer the \dQuote{light} scheme, you may use
237#'   \code{c("dark", html="light")} as the value for this parameter.  This is
238#'   particularly useful if \code{format} is set to \dQuote{auto} or if you
239#'   want to specify a default value for this parameter via options.  Any names
240#'   you use should correspond to a \code{format}.  You must have one unnamed
241#'   value which will be used as the default for all \code{format}s that are
242#'   not explicitly specified.
243#' @param color.mode character, one of \dQuote{rgb} or \dQuote{yb}.
244#'   Defaults to \dQuote{yb}.  \dQuote{yb} stands for \dQuote{Yellow-Blue} for
245#'   color schemes that rely primarily on those colors to style diffs.
246#'   Those colors can be easily distinguished by individuals with
247#'   limited red-green color sensitivity.  See \code{\link{PaletteOfStyles}} for
248#'   details and limitations.  Also offers the same advanced usage as the
249#'   \code{brightness} parameter.
250#' @param word.diff TRUE (default) or FALSE, whether to run a secondary word
251#'   diff on the in-hunk differences.  For atomic vectors setting this to
252#'   FALSE could make the diff \emph{slower} (see the \code{unwrap.atomic}
253#'   parameter).  For other uses, particularly with \code{\link{diffChr}}
254#'   setting this to FALSE can substantially improve performance.
255#' @param pager one of \dQuote{auto} (default), \dQuote{on},
256#'   \dQuote{off}, a \code{\link{Pager}} object, or a list; controls whether and
257#'   how a pager is used to display the diff output.  If you require a
258#'   particular pager behavior you must use a \code{\link{Pager}}
259#'   object, or \dQuote{off} to turn off the pager.  All other settings will
260#'   interact with other parameters such as \code{format}, \code{style}, as well
261#'   as with your system capabilities in order to select the pager expected to
262#'   be most useful.
263#'
264#'   \dQuote{auto} and \dQuote{on} are the same, except that in non-interactive
265#'   mode \dQuote{auto} is equivalent to \dQuote{off}.  \dQuote{off} will always
266#'   send output to the console.  If \dQuote{on}, whether the output
267#'   actually gets routed to the pager depends on the pager \code{threshold}
268#'   setting (see \code{\link{Pager}}).  The default behavior is to use the
269#'   pager associated with the \code{Style} object.  The \code{Style} object is
270#'   itself is determined by the \code{format} or \code{style} parameters.
271#'
272#'   Depending on your system configuration different styles and corresponding
273#'   pagers will get selected, unless you specify a \code{Pager} object
274#'   directly.  On a system with a system pager that supports ANSI CSI SGR
275#'   colors, the pager will only trigger if the output is taller than one
276#'   window.  If the system pager is not known to support ANSI colors then the
277#'   output will be sent as HTML to the IDE viewer if available or to the web
278#'   browser if not.  Even though Rstudio now supports ANSI CSI SGR at the
279#'   console output is still formatted as HTML and sent to the IDE viewer.
280#'   Partly this is for continuity of behavior, but also because the default
281#'   Rstudio pager does not support ANSI CSI SGR, at least as of this writing.
282#'
283#'   If \code{pager} is a list, then the same as with \dQuote{on}, except that
284#'   the \code{Pager} object associated with the selected \code{Style} object is
285#'   re-instantiated with the union of the list elements and the existing
286#'   settings of that \code{Pager}.  The list should contain named elements that
287#'   correspond to the \code{\link{Pager}} instantiation parameters.  The names
288#'   must be specified in full as partial parameter matching will not be carried
289#'   out because the pager is re-instantiated with \code{\link{new}}.
290#'
291#'   See \code{\link{Pager}}, \code{\link{Style}}, and
292#'   \code{\link{PaletteOfStyles}} for more details and for instructions on how
293#'   to modify the default behavior.
294#' @param guides TRUE (default), FALSE, or a function that accepts at least two
295#'   arguments and requires no more than two arguments.  Guides
296#'   are additional context lines that are not strictly part of a hunk, but
297#'   provide important contextual data (e.g. column headers).  If TRUE, the
298#'   context lines are shown in addition to the normal diff output, typically
299#'   in a different color to indicate they are not part of the hunk.  If a
300#'   function, the function should accept as the first argument the object
301#'   being diffed, and the second the character representation of the object.
302#'   The function should return the indices of the elements of the
303#'   character representation that should be treated as guides.  See
304#'   \code{\link{guides}} for more details.
305#' @param trim TRUE (default), FALSE, or a function that accepts at least two
306#'   arguments and requires no more than two arguments.  Function should compute
307#'   for each line in captured output what portion of those lines should be
308#'   diffed.  By default, this is used to remove row meta data differences
309#'   (e.g. \code{[1,]}) so they alone do not show up as differences in the
310#'   diff.  See \code{\link{trim}} for more details.
311#' @param rds TRUE (default) or FALSE, if TRUE will check whether
312#'   \code{target} and/or \code{current} point to a file that can be read with
313#'   \code{\link{readRDS}} and if so, loads the R object contained in the file
314#'   and carries out the diff on the object instead of the original argument.
315#'   Currently there is no mechanism for specifying additional arguments to
316#'   \code{readRDS}
317#' @param unwrap.atomic TRUE (default) or FALSE.  Relevant primarily for
318#'   \code{diffPrint}, if TRUE, and \code{word.diff} is also TRUE, and both
319#'   \code{target} and \code{current} are \emph{unnamed} one-dimension atomics ,
320#'   the vectors are unwrapped and diffed element by element, and then
321#'   re-wrapped.  Since \code{diffPrint} is fundamentally a line diff, the
322#'   re-wrapped lines are lined up in a manner that is as consistent as possible
323#'   with the unwrapped diff.  Lines that contain the location of the word
324#'   differences will be paired up.  Since the vectors may well be wrapped with
325#'   different periodicities this will result in lines that are paired up that
326#'   look like they should not be paired up, though the locations of the
327#'   differences should be.  If is entirely possible that setting this parameter
328#'   to FALSE will result in a slower diff.  This happens if two vectors are
329#'   actually fairly similar, but their line representations are not.  For
330#'   example, in comparing \code{1:100} to \code{c(100, 1:99)}, there is really
331#'   only one difference at the \dQuote{word} level, but every screen line is
332#'   different.  \code{diffChr} will also do the unwrapping if it is given a
333#'   character vector that contains output that looks like the atomic vectors
334#'   described above.  This is a bug, but as the functionality could be useful
335#'   when diffing e.g. \code{capture.output} data, we now declare it a feature.
336#' @param line.limit integer(2L) or integer(1L), if length 1 how many lines of
337#'   output to show, where \code{-1} means no limit.  If length 2, the first
338#'   value indicates the threshold of screen lines to begin truncating output,
339#'   and the second the number of lines to truncate to, which should be fewer
340#'   than the threshold.  Note that this parameter is implemented on a
341#'   best-efforts basis and should not be relied on to produce the exact
342#'   number of lines requested.  In particular do not expect it to work well for
343#'   for values small enough that the banner portion of the diff would have to
344#'   be trimmed.  If you want a specific number of lines use \code{[} or
345#'   \code{head} / \code{tail}.  One advantage of \code{line.limit} over these
346#'   other options is that you can combine it with \code{context="auto"} and
347#'   auto \code{max.level} selection (the latter for \code{diffStr}), which
348#'   allows the diff to dynamically adjust to make best use of the available
349#'   display lines.  \code{[}, \code{head}, and \code{tail} just subset the text
350#'   of the output.
351#' @param hunk.limit integer(2L) or integer (1L), how many diff hunks to show.
352#'   Behaves similarly to \code{line.limit}.  How many hunks are in a
353#'   particular diff is a function of how many differences, and also how much
354#'   \code{context} is used since context can cause two hunks to bleed into
355#'   each other and become one.
356#' @param max.diffs integer(1L), number of \emph{differences} (default 50000L)
357#'   after which we abandon the \code{O(n^2)} diff algorithm in favor of a naive
358#'   \code{O(n)} one. Set to \code{-1L} to stick to the original algorithm up to
359#'   the maximum allowed (~INT_MAX/4).
360#' @param disp.width integer(1L) number of display columns to take up; note that
361#'   in \dQuote{sidebyside} \code{mode} the effective display width is half this
362#'   number (set to 0L to use default widths which are \code{getOption("width")}
363#'   for normal styles and \code{80L} for HTML styles.  Future versions of
364#'   \code{diffobj} may change this to larger values for two dimensional objects
365#'   for better diffs (see details).
366#' @param ignore.white.space TRUE or FALSE, whether to consider differences in
367#'   horizontal whitespace (i.e. spaces and tabs) as differences (defaults to
368#'   TRUE).
369#' @param convert.hz.white.space TRUE or FALSE, whether modify input strings
370#'   that contain tabs and carriage returns in such a way that they display as
371#'   they would \bold{with} those characters, but without using those
372#'   characters (defaults to TRUE).  The conversion assumes that tab stops are
373#'   spaced evenly eight characters apart on the terminal.  If this is not the
374#'   case you may specify the tab stops explicitly with \code{tab.stops}.
375#' @param tab.stops integer, what tab stops to use when converting hard tabs to
376#'   spaces.  If not integer will be coerced to integer (defaults to 8L).  You
377#'   may specify more than one tab stop.  If display width exceeds that
378#'   addressable by your tab stops the last tab stop will be repeated.
379#' @param align numeric(1L) between 0 and 1, proportion of
380#'   words in a line of \code{target} that must be matched in a line of
381#'   \code{current} in the same hunk for those lines to be paired up when
382#'   displayed (defaults to 0.25), or an \code{\link{AlignThreshold}} object.
383#'   Set to \code{1} to turn off alignment which will cause all lines in a hunk
384#'   from \code{target} to show up first, followed by all lines from
385#'   \code{current}.  Note that in order to be aligned lines must meet the
386#'   threshold and have at least 3 matching alphanumeric characters (see
387#'   \code{\link{AlignThreshold}} for details).
388#' @param style \dQuote{auto}, a \code{\link{Style}} object, or a list.
389#'   \dQuote{auto} by default.  If a \code{Style} object, will override the
390#'   the \code{format}, \code{brightness}, and \code{color.mode} parameters.
391#'   The \code{Style} object provides full control of diff output styling.
392#'   If a list, then the same as \dQuote{auto}, except that if the auto-selected
393#'   \code{Style} requires instantiation (see \code{\link{PaletteOfStyles}}),
394#'   then the list contents will be used as arguments when instantiating the
395#'   style object.  See \code{\link{Style}} for more details, in particular the
396#'   examples.
397#' @param palette.of.styles \code{\link{PaletteOfStyles}} object; advanced
398#'   usage, contains all the \code{\link{Style}} objects or
399#'   \dQuote{classRepresentation} objects extending \code{\link{Style}} that are
400#'   selected by specifying the \code{format}, \code{brightness}, and
401#'   \code{color.mode} parameters.  See \code{\link{PaletteOfStyles}} for more
402#'   details.
403#' @param frame an environment to use as the evaluation frame for the
404#'   \code{print/show/str}, calls and for \code{diffObj}, the evaluation frame
405#'   for the \code{diffPrint} / \code{diffStr} calls.  Defaults to the return
406#'   value of \code{\link{par_frame}}.
407#' @param interactive TRUE or FALSE whether the function is being run in
408#'   interactive mode, defaults to the return value of
409#'   \code{\link{interactive}}.  If in interactive mode, pager will be used if
410#'   \code{pager} is \dQuote{auto}, and if ANSI styles are not supported and
411#'   \code{style} is \dQuote{auto}, output will be send to viewer/browser as
412#'   HTML.
413#' @param term.colors integer(1L) how many ANSI colors are supported by the
414#'   terminal.  This variable is provided for when
415#'   \code{\link[=num_colors]{crayon::num_colors}} does not properly detect how
416#'   many ANSI colors are supported by your terminal. Defaults to return value
417#'   of \code{\link[=num_colors]{crayon::num_colors}} and should be 8 or 256 to
418#'   allow ANSI colors, or any other number to disallow them.  This only
419#'   impacts output format selection when \code{style} and \code{format} are
420#'   both set to \dQuote{auto}.
421#' @param tar.banner character(1L), language, or NULL, used to generate the
422#'   text to display ahead of the diff section representing the target output.
423#'   If NULL will use the deparsed \code{target} expression, if language, will
424#'   use the language as it would the \code{target} expression, if
425#'   character(1L), will use the string with no modifications.  The language
426#'   mode is provided because \code{diffStr} modifies the expression prior to
427#'   display (e.g. by wrapping it in a call to \code{str}).  Note that it is
428#'   possible in some cases that the substituted value of \code{target} actually
429#'   is character(1L), but if you provide a character(1L) value here it will be
430#'   assumed you intend to use that value literally.
431#' @param cur.banner character(1L) like \code{tar.banner}, but for
432#'   \code{current}
433#' @param strip.sgr TRUE, FALSE, or NULL (default), whether to strip ANSI CSI
434#'   SGR sequences prior to comparison and for display of diff.  If NULL,
435#'   resolves to TRUE if `style` resolves to an ANSI formatted diff, and
436#'   FALSE otherwise.  The default behavior is to avoid confusing diffs where
437#'   the original SGR and the SGR added by the diff are mixed together.
438#' @param sgr.supported TRUE, FALSE, or NULL (default), whether to assume the
439#'   standard output device supports ANSI CSI SGR sequences.  If TRUE, strings
440#'   will be manipulated accounting for the SGR sequences.  If NULL,
441#'   resolves to TRUE if `style` resolves to an ANSI formatted diff, and
442#'   to `crayon::has_color()` otherwise.  This only controls how the strings are
443#'   manipulated, not whether SGR is added to format the diff, which is
444#'   controlled by the `style` parameter.  This parameter is exposed for the
445#'   rare cases where you might wish to control string manipulation behavior
446#'   directly.
447#' @param extra list additional arguments to pass on to the functions used to
448#'   create text representation of the objects to diff (e.g. \code{print},
449#'   \code{str}, etc.)
450#' @param ... unused, for compatibility of methods with generics
451#' @return a \code{Diff} object; this object has a \code{show}
452#'   method that will display the diff to screen or pager, as well as
453#'   \code{summary}, \code{any}, and \code{as.character} methods.
454#'   If you store the return value instead of displaying it to screen, and
455#'   display it later, it is possible for the display to be thrown off if
456#'   there are environment changes (e.g. display width changes) in between
457#'   the time you compute the diff and the time you display it.
458#' @rdname diffPrint
459#' @name diffPrint
460#' @export
461#' @examples
462#' ## `pager="off"` for CRAN compliance; you may omit in normal use
463#' diffPrint(letters, letters[-5], pager="off")
464
465setGeneric(
466  "diffPrint", function(target, current, ...) standardGeneric("diffPrint")
467)
468
469#' @rdname diffPrint
470
471setMethod("diffPrint", signature=c("ANY", "ANY"), make_diff_fun(capt_print))
472
473#' Diff Object Structures
474#'
475#' Compares the \code{str} output of \code{target} and \code{current}.  If
476#' the \code{max.level} parameter to \code{str} is left unspecified, will
477#' attempt to find the largest \code{max.level} that fits within
478#' \code{line.limit} and shows at least one difference.
479#'
480#' Due to the seemingly inconsistent nature of \code{max.level} when used with
481#' objects with nested attributes, and also due to the relative slowness of
482#' \code{str}, this function simulates the effect of \code{max.level} by hiding
483#' nested lines instead of repeatedly calling \code{str} with varying values of
484#' \code{max.level}.
485#'
486#' @inheritParams diffPrint
487#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
488#'   \code{\link{diffObj}}, \code{\link{diffStr}},
489#'   \code{\link{diffChr}} to compare character vectors directly,
490#'   \code{\link{diffDeparse}} to compare deparsed objects,
491#'   \code{\link{ses}} for a minimal and fast diff
492#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
493#' @rdname diffStr
494#' @export
495#' @examples
496#' ## `pager="off"` for CRAN compliance; you may omit in normal use
497#' with(mtcars, diffStr(lm(mpg ~ hp)$qr, lm(mpg ~ disp)$qr, pager="off"))
498
499setGeneric("diffStr", function(target, current, ...) standardGeneric("diffStr"))
500
501#' @rdname diffStr
502
503setMethod("diffStr", signature=c("ANY", "ANY"), make_diff_fun(capt_str))
504
505#' Diff Character Vectors Element By Element
506#'
507#' Will perform the diff on the actual string values of the character vectors
508#' instead of capturing the printed screen output. Each vector element is
509#' treated as a line of text.  NA elements are treated as the string
510#' \dQuote{NA}.  Non character inputs are coerced to character and attributes
511#' are dropped with \code{\link{c}}.
512#'
513#' @inheritParams diffPrint
514#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
515#'   \code{\link{diffObj}}, \code{\link{diffStr}},
516#'   \code{\link{diffDeparse}} to compare deparsed objects,
517#'   \code{\link{ses}} for a minimal and fast diff
518#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
519#' @export
520#' @rdname diffChr
521#' @examples
522#' ## `pager="off"` for CRAN compliance; you may omit in normal use
523#' diffChr(LETTERS[1:5], LETTERS[2:6], pager="off")
524
525setGeneric("diffChr", function(target, current, ...) standardGeneric("diffChr"))
526
527#' @rdname diffChr
528
529setMethod("diffChr", signature=c("ANY", "ANY"), make_diff_fun(capt_chr))
530
531#' Diff Deparsed Objects
532#'
533#' Perform diff on the character vectors produced by \code{\link{deparse}}ing
534#' the objects.  Each element counts as a line.  If an element contains newlines
535#' it will be split into elements new lines by the newlines.
536#'
537#' @export
538#' @inheritParams diffPrint
539#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
540#'   \code{\link{diffObj}}, \code{\link{diffStr}},
541#'   \code{\link{diffChr}} to compare character vectors directly,
542#'   \code{\link{ses}} for a minimal and fast diff
543#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
544#' @export
545#' @rdname diffDeparse
546#' @examples
547#' ## `pager="off"` for CRAN compliance; you may omit in normal use
548#' diffDeparse(matrix(1:9, 3), 1:9, pager="off")
549
550setGeneric(
551  "diffDeparse", function(target, current, ...) standardGeneric("diffDeparse")
552)
553#' @rdname diffDeparse
554
555setMethod("diffDeparse", signature=c("ANY", "ANY"), make_diff_fun(capt_deparse))
556
557#' Diff Files
558#'
559#' Reads text files with \code{\link{readLines}} and performs a diff on the
560#' resulting character vectors.
561#'
562#' @export
563#' @param target character(1L) or file connection with read capability; if
564#'   character should point to a text file
565#' @param current like \code{target}
566#' @inheritParams diffPrint
567#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
568#'   \code{\link{diffObj}}, \code{\link{diffStr}},
569#'   \code{\link{diffChr}} to compare character vectors directly,
570#'   \code{\link{ses}} for a minimal and fast diff
571#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
572#' @export
573#' @rdname diffFile
574#' @examples
575#' \dontrun{
576#' url.base <- "https://raw.githubusercontent.com/wch/r-source"
577#' f1 <- file.path(url.base, "29f013d1570e1df5dc047fb7ee304ff57c99ea68/README")
578#' f2 <- file.path(url.base, "daf0b5f6c728bd3dbcd0a3c976a7be9beee731d9/README")
579#' diffFile(f1, f2)
580#' }
581
582setGeneric(
583  "diffFile", function(target, current, ...) standardGeneric("diffFile")
584)
585#' @rdname diffFile
586
587setMethod("diffFile", signature=c("ANY", "ANY"), make_diff_fun(capt_file))
588
589#' Diff CSV Files
590#'
591#' Reads CSV files with \code{\link{read.csv}} and passes the resulting data
592#' frames onto \code{\link{diffPrint}}.  \code{extra} values are passed as
593#' arguments are passed to both \code{read.csv} and \code{print}.  To the
594#' extent you wish to use different \code{extra} arguments for each of those
595#' functions you will need to \code{read.csv} the files and pass them to
596#' \code{diffPrint} yourself.
597#'
598#' @export
599#' @param target character(1L) or file connection with read capability;
600#'   if character should point to a CSV file
601#' @param current like \code{target}
602#' @inheritParams diffPrint
603#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
604#'   \code{\link{diffObj}}, \code{\link{diffStr}},
605#'   \code{\link{diffChr}} to compare character vectors directly,
606#'   \code{\link{ses}} for a minimal and fast diff
607#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
608#' @export
609#' @rdname diffCsv
610#' @examples
611#' iris.2 <- iris
612#' iris.2$Sepal.Length[5] <- 99
613#' f1 <- tempfile()
614#' f2 <- tempfile()
615#' write.csv(iris, f1, row.names=FALSE)
616#' write.csv(iris.2, f2, row.names=FALSE)
617#' ## `pager="off"` for CRAN compliance; you may omit in normal use
618#' diffCsv(f1, f2, pager="off")
619#' unlink(c(f1, f2))
620
621setGeneric(
622  "diffCsv", function(target, current, ...) standardGeneric("diffCsv")
623)
624#' @rdname diffCsv
625
626setMethod("diffCsv", signature=c("ANY", "ANY"), make_diff_fun(capt_csv))
627
628#' Diff Objects
629#'
630#' Compare either the \code{print}ed or \code{str} screen representation of
631#' R objects depending on which is estimated to produce the most useful
632#' diff.  The selection process tries to minimize screen lines while maximizing
633#' differences shown subject to display constraints.  The decision algorithm is
634#' likely to evolve over time, so do not rely on this function making
635#' a particular selection under specific circumstances.  Instead, use
636#' \code{\link{diffPrint}} or \code{\link{diffStr}} if you require one or the
637#' other output.
638#'
639#' @inheritParams diffPrint
640#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} methods,
641#'   \code{\link{diffStr}},
642#'   \code{\link{diffChr}} to compare character vectors directly
643#'   \code{\link{diffDeparse}} to compare deparsed objects,
644#'   \code{\link{ses}} for a minimal and fast diff
645#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
646#' @export
647#' @examples
648#' ## `pager="off"` for CRAN compliance; you may omit in normal use
649#' diffObj(letters, c(letters[1:10], LETTERS[11:26]), pager="off")
650#' with(mtcars, diffObj(lm(mpg ~ hp)$qr, lm(mpg ~ disp)$qr, pager="off"))
651
652setGeneric("diffObj", function(target, current, ...) standardGeneric("diffObj"))
653
654diff_obj <- make_diff_fun(identity) # we overwrite the body next
655body(diff_obj) <- quote({
656  if(length(extra))
657    stop("Argument `extra` must be empty in `diffObj`.")
658
659  # frame # force frame so that `par_frame` called in this context
660
661  # Need to generate calls inside a new child environment so that we do not
662  # pollute the environment and create potential conflicts with ... args
663  # used to run this inside a `local` call, but issues cropped up with the
664  # advent of JIT, and can't recall why just storing arguments at first
665  # was a problem
666
667  args <- as.list(environment())
668  call.dat <- extract_call(sys.calls(), frame)
669  err <- make_err_fun(call.dat$call)
670
671  if(is.null(args$tar.banner)) args$tar.banner <- call("quote", call.dat$tar)
672  if(is.null(args$cur.banner)) args$cur.banner <- call("quote", call.dat$cur)
673
674  call.print <- as.call(c(list(quote(diffobj::diffPrint)), args))
675  call.str <- as.call(c(list(quote(diffobj::diffStr)), args))
676  call.str[["extra"]] <- list(max.level="auto")
677  res.print <- try(eval(call.print, frame), silent=TRUE)
678  res.str <- try(eval(call.str, frame), silent=TRUE)
679
680  if(inherits(res.str, "try-error"))
681    err(
682      "Error in calling `diffStr`: ",
683      conditionMessage(attr(res.str, "condition"))
684    )
685  if(inherits(res.print, "try-error"))
686    err(
687      "Error in calling `diffPrint`: ",
688      conditionMessage(attr(res.print, "condition"))
689    )
690
691  # Run both the print and str versions, and then decide which to use based
692  # on some weighting of various factors including how many lines needed to be
693  # omitted vs. how many differences were reported
694
695  diff.p <- count_diff_hunks(res.print@diffs)
696  diff.s <- count_diff_hunks(res.str@diffs)
697  diff.l.p <- diff_line_len(
698    res.print@diffs, res.print@etc, tar.capt=res.print@tar.dat$raw,
699    cur.capt=res.print@cur.dat$raw
700  )
701  diff.l.s <- diff_line_len(
702    res.str@diffs, res.str@etc, tar.capt=res.str@tar.dat$raw,
703    cur.capt=res.str@cur.dat$raw
704  )
705
706  # How many lines of the input are in the diffs, vs how many lines of input
707
708  diff.line.ratio.p <- lineCoverage(res.print)
709  diff.line.ratio.s <- lineCoverage(res.str)
710
711  # Only show the one with differences
712
713  res <- if(!diff.s && diff.p) {
714    res.print
715  } else if(!diff.p && diff.s) {
716    res.str
717
718  # If one fits in full and the other doesn't, show the one that fits in full
719  } else if(
720    !res.str@trim.dat$lines[[1L]] &&
721    res.print@trim.dat$lines[[1L]]
722  ) {
723    res.str
724  } else if(
725    res.str@trim.dat$lines[[1L]] &&
726    !res.print@trim.dat$lines[[1L]]
727  ) {
728    res.print
729  } else if (diff.l.p <= console_lines() / 2) {
730    # Always use print if print output is reasonable size
731    res.print
732  } else {
733  # Calculate the trade offs between the two options
734    s.score <- diff.s / diff.l.s * diff.line.ratio.s
735    p.score <- diff.p / diff.l.p * diff.line.ratio.p
736    if(p.score >= s.score) res.print else res.str
737  }
738  res
739})
740#' @export
741setMethod("diffObj", signature=c("ANY", "ANY"), diff_obj)
742