1# Copyright (C) 2021 Brodie Gaslam 2# 3# This file is part of "diffobj - Diffs for R Objects" 4# 5# This program is free software: you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation, either version 2 of the License, or 8# (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# Go to <https://www.r-project.org/Licenses/GPL-2> for a copy of the license. 16 17#' Diffs for R Objects 18#' 19#' Generate a colorized diff of two R objects for an intuitive visualization of 20#' their differences. See `vignette(package="diffobj", "diffobj")` for details. 21#' 22#' @import crayon 23#' @import methods 24#' @importFrom utils capture.output file_test packageVersion read.csv 25#' @importFrom stats ave frequency is.ts setNames 26#' @importFrom grDevices rgb 27#' @name diffobj-package 28#' @docType package 29 30NULL 31 32# Because all these functions are so similar, we have constructed them with a 33# function factory. This allows us to easily maintain consistent formals during 34# initial development process when they have not been set in stone yet. 35 36make_diff_fun <- function(capt_fun) { 37 # nocov start 38 function( 39 target, current, 40 mode=gdo("mode"), 41 context=gdo("context"), 42 format=gdo("format"), 43 brightness=gdo("brightness"), 44 color.mode=gdo("color.mode"), 45 word.diff=gdo("word.diff"), 46 pager=gdo("pager"), 47 guides=gdo("guides"), 48 trim=gdo("trim"), 49 rds=gdo("rds"), 50 unwrap.atomic=gdo("unwrap.atomic"), 51 max.diffs=gdo("max.diffs"), 52 disp.width=gdo("disp.width"), 53 ignore.white.space=gdo("ignore.white.space"), 54 convert.hz.white.space=gdo("convert.hz.white.space"), 55 tab.stops=gdo("tab.stops"), 56 line.limit=gdo("line.limit"), 57 hunk.limit=gdo("hunk.limit"), 58 align=gdo("align"), 59 style=gdo("style"), 60 palette.of.styles=gdo("palette"), 61 frame=par_frame(), 62 interactive=gdo("interactive"), 63 term.colors=gdo("term.colors"), 64 tar.banner=NULL, 65 cur.banner=NULL, 66 strip.sgr=gdo("strip.sgr"), 67 sgr.supported=gdo("sgr.supported"), 68 extra=list() 69 ) { 70 # nocov end 71 frame # force frame so that `par_frame` called in this context 72 call.dat <- extract_call(sys.calls(), frame) 73 target # force target/current so if one missing we get an error here 74 current # and not later 75 76 # Check args and evaluate all the auto-selection arguments 77 78 etc.proc <- check_args( 79 call=call.dat$call, tar.exp=call.dat$tar, cur.exp=call.dat$cur, 80 mode=mode, context=context, line.limit=line.limit, format=format, 81 brightness=brightness, color.mode=color.mode, pager=pager, 82 ignore.white.space=ignore.white.space, max.diffs=max.diffs, 83 align=align, disp.width=disp.width, 84 hunk.limit=hunk.limit, convert.hz.white.space=convert.hz.white.space, 85 tab.stops=tab.stops, style=style, palette.of.styles=palette.of.styles, 86 frame=frame, tar.banner=tar.banner, cur.banner=cur.banner, guides=guides, 87 rds=rds, trim=trim, word.diff=word.diff, unwrap.atomic=unwrap.atomic, 88 extra=extra, interactive=interactive, term.colors=term.colors, 89 strip.sgr=strip.sgr, sgr.supported=sgr.supported, 90 call.match=match.call() 91 ) 92 # If in rds mode, try to see if either target or current reference an RDS 93 94 if(rds) { 95 target <- get_rds(target) 96 current <- get_rds(current) 97 } 98 # Force crayon to whatever ansi status we chose; note we must do this after 99 # touching vars in case someone passes `options(crayon.enabled=...)` as one 100 # of the arguments 101 102 # old.crayon.opt <- options( 103 # crayon.enabled= 104 # is(etc.proc@style, "StyleAnsi") || 105 # (!is(etc.proc@style, "StyleHtml") && etc.proc@sgr.supported) 106 # ) 107 # on.exit(options(old.crayon.opt), add=TRUE) 108 err <- make_err_fun(sys.call()) 109 110 # Compute gutter values so that we know correct widths to use for capture, 111 # etc. If not a base text type style, assume gutter and column padding are 112 # zero even though that may not always be correct 113 114 etc.proc@gutter <- gutter_dat(etc.proc) 115 116 col.pad.width <- 117 nchar2(etc.proc@style@text@pad.col, sgr.supported=etc.proc@sgr.supported) 118 gutt.width <- etc.proc@gutter@width 119 120 half.width <- as.integer((etc.proc@disp.width - col.pad.width) / 2) 121 etc.proc@line.width <- 122 max(etc.proc@disp.width, .min.width + gutt.width) 123 etc.proc@text.width <- etc.proc@line.width - gutt.width 124 etc.proc@line.width.half <- max(half.width, .min.width + gutt.width) 125 etc.proc@text.width.half <- etc.proc@line.width.half - gutt.width 126 127 # If in side by side mode already then we know we want half-width, and if 128 # width is less than 80 we know we want unitfied 129 130 if(etc.proc@mode == "auto" && etc.proc@disp.width < 80L) 131 etc.proc@mode <- "unified" 132 if(etc.proc@mode == "sidebyside") etc.proc <- sideBySide(etc.proc) 133 134 # Capture and diff 135 136 diff <- capt_fun(target, current, etc=etc.proc, err=err, extra) 137 diff 138 } 139} 140#' Diff \code{print}ed Objects 141#' 142#' Runs the diff between the \code{print} or \code{show} output produced by 143#' \code{target} and \code{current}. Given the extensive parameter list, this 144#' documentation page is intended as a reference for all the \code{diff*} 145#' methods. For a high level introduction see \code{vignette("diffobj")}. 146#' 147#' Almost all aspects of how the diffs are computed and displayed are 148#' controllable through the \code{diff*} methods parameters. This results in a 149#' lengthy parameter list, but in practice you should rarely need to adjust 150#' anything past the \code{color.mode} parameter. Default values are specified 151#' as options so that users may configure diffs in a persistent manner. 152#' \code{\link{gdo}} is a shorthand function to access \code{diffobj} options. 153#' 154#' Parameter order after \code{color.mode} is not guaranteed. Future versions 155#' of \code{diffobj} may add parameters and re-order existing parameters past 156#' \code{color.mode}. 157#' 158#' This and other \code{diff*} functions are S4 generics that dispatch on the 159#' \code{target} and \code{current} parameters. Methods with signature 160#' \code{c("ANY", "ANY")} are defined and act as the default methods. You can 161#' use this to set up methods to pre-process or set specific parameters for 162#' selected classes that can then \code{callNextMethod} for the actual diff. 163#' Note that while the generics include \code{...} as an argument, none of the 164#' methods do. 165#' 166#' Strings are re-encoded to UTF-8 with \code{\link{enc2utf8}} prior to 167#' comparison to avoid encoding-only differences. 168#' 169#' The text representation of `target` and `current` should each have no more 170#' than ~INT_MAX/4 lines. 171#' 172#' @section Matrices and Data Frames: 173#' 174#' While \code{diffPrint} attempts to handle the default R behavior that wraps 175#' wide tables, the results are often sub-optimal. A better approach is to set 176#' the \code{disp.width} parameter to a large enough value such that wrapping is 177#' not necessary, and a browser-based \code{pager}. In the future we will add 178#' the capability to specify different capture widths and wrap widths so that 179#' this is an option for terminal output (see 180#' \href{https://github.com/brodieG/diffobj/issues/109}{issue 109}). 181#' 182#' One thing to keep in mind is that \code{diffPrint} is not designed to work 183#' with very large data frames. 184#' 185#' @export 186#' @seealso \code{\link{diffObj}}, \code{\link{diffStr}}, 187#' \code{\link{diffChr}} to compare character vectors directly, 188#' \code{\link{diffDeparse}} to compare deparsed objects, \code{\link{ses}} 189#' for a minimal and fast diff @param target the reference object 190#' @param target the reference object 191#' @param current the object being compared to \code{target} 192#' @param mode character(1L), one of: 193#' \itemize{ 194#' \item \dQuote{unified}: diff mode used by \code{git diff} 195#' \item \dQuote{sidebyside}: line up the differences side by side 196#' \item \dQuote{context}: show the target and current hunks in their 197#' entirety; this mode takes up a lot of screen space but makes it easier 198#' to see what the objects actually look like 199#' \item \dQuote{auto}: default mode; pick one of the above, will favor 200#' \dQuote{sidebyside} unless \code{getOption("width")} is less than 80, 201#' or in \code{diffPrint} and objects are dimensioned and do not fit side 202#' by side, or in \code{diffChr}, \code{diffDeparse}, \code{diffFile} and 203#' output does not fit in side by side without wrapping 204#' } 205#' @param context integer(1L) how many lines of context are shown on either side 206#' of differences (defaults to 2). Set to \code{-1L} to allow as many as 207#' there are. Set to \dQuote{auto} to display as many as 10 lines or as few 208#' as 1 depending on whether total screen lines fit within the number of lines 209#' specified in \code{line.limit}. Alternatively pass the return value of 210#' \code{\link{auto_context}} to fine tune the parameters of the auto context 211#' calculation. 212#' @param format character(1L), controls the diff output format, one of: 213#' \itemize{ 214#' \item \dQuote{auto}: to select output format based on terminal 215#' capabilities; will attempt to use one of the ANSI formats if they 216#' appear to be supported, and if not or if you are in the Rstudio console 217#' it will attempt to use HTML and browser output if in interactive mode. 218#' \item \dQuote{raw}: plain text 219#' \item \dQuote{ansi8}: color and format diffs using basic ANSI escape 220#' sequences 221#' \item \dQuote{ansi256}: like \dQuote{ansi8}, except using the full range 222#' of ANSI formatting options 223#' \item \dQuote{html}: color and format using HTML markup; the resulting 224#' string is processed with \code{\link{enc2utf8}} when output as a full 225#' web page (see docs for \code{html.output} under \code{\link{Style}}). 226#' } 227#' Defaults to \dQuote{auto}. See \code{palette.of.styles} for details 228#' on customization, \code{\link{style}} for full control of output format. 229#' See `pager` parameter for more discussion of Rstudio behavior. 230#' @param brightness character, one of \dQuote{light}, \dQuote{dark}, 231#' \dQuote{neutral}, useful for adjusting color scheme to light or dark 232#' terminals. \dQuote{neutral} by default. See \code{\link{PaletteOfStyles}} 233#' for details and limitations. Advanced: you may specify brightness as a 234#' function of \code{format}. For example, if you typically wish to use a 235#' \dQuote{dark} color scheme, except for when in \dQuote{html} format when 236#' you prefer the \dQuote{light} scheme, you may use 237#' \code{c("dark", html="light")} as the value for this parameter. This is 238#' particularly useful if \code{format} is set to \dQuote{auto} or if you 239#' want to specify a default value for this parameter via options. Any names 240#' you use should correspond to a \code{format}. You must have one unnamed 241#' value which will be used as the default for all \code{format}s that are 242#' not explicitly specified. 243#' @param color.mode character, one of \dQuote{rgb} or \dQuote{yb}. 244#' Defaults to \dQuote{yb}. \dQuote{yb} stands for \dQuote{Yellow-Blue} for 245#' color schemes that rely primarily on those colors to style diffs. 246#' Those colors can be easily distinguished by individuals with 247#' limited red-green color sensitivity. See \code{\link{PaletteOfStyles}} for 248#' details and limitations. Also offers the same advanced usage as the 249#' \code{brightness} parameter. 250#' @param word.diff TRUE (default) or FALSE, whether to run a secondary word 251#' diff on the in-hunk differences. For atomic vectors setting this to 252#' FALSE could make the diff \emph{slower} (see the \code{unwrap.atomic} 253#' parameter). For other uses, particularly with \code{\link{diffChr}} 254#' setting this to FALSE can substantially improve performance. 255#' @param pager one of \dQuote{auto} (default), \dQuote{on}, 256#' \dQuote{off}, a \code{\link{Pager}} object, or a list; controls whether and 257#' how a pager is used to display the diff output. If you require a 258#' particular pager behavior you must use a \code{\link{Pager}} 259#' object, or \dQuote{off} to turn off the pager. All other settings will 260#' interact with other parameters such as \code{format}, \code{style}, as well 261#' as with your system capabilities in order to select the pager expected to 262#' be most useful. 263#' 264#' \dQuote{auto} and \dQuote{on} are the same, except that in non-interactive 265#' mode \dQuote{auto} is equivalent to \dQuote{off}. \dQuote{off} will always 266#' send output to the console. If \dQuote{on}, whether the output 267#' actually gets routed to the pager depends on the pager \code{threshold} 268#' setting (see \code{\link{Pager}}). The default behavior is to use the 269#' pager associated with the \code{Style} object. The \code{Style} object is 270#' itself is determined by the \code{format} or \code{style} parameters. 271#' 272#' Depending on your system configuration different styles and corresponding 273#' pagers will get selected, unless you specify a \code{Pager} object 274#' directly. On a system with a system pager that supports ANSI CSI SGR 275#' colors, the pager will only trigger if the output is taller than one 276#' window. If the system pager is not known to support ANSI colors then the 277#' output will be sent as HTML to the IDE viewer if available or to the web 278#' browser if not. Even though Rstudio now supports ANSI CSI SGR at the 279#' console output is still formatted as HTML and sent to the IDE viewer. 280#' Partly this is for continuity of behavior, but also because the default 281#' Rstudio pager does not support ANSI CSI SGR, at least as of this writing. 282#' 283#' If \code{pager} is a list, then the same as with \dQuote{on}, except that 284#' the \code{Pager} object associated with the selected \code{Style} object is 285#' re-instantiated with the union of the list elements and the existing 286#' settings of that \code{Pager}. The list should contain named elements that 287#' correspond to the \code{\link{Pager}} instantiation parameters. The names 288#' must be specified in full as partial parameter matching will not be carried 289#' out because the pager is re-instantiated with \code{\link{new}}. 290#' 291#' See \code{\link{Pager}}, \code{\link{Style}}, and 292#' \code{\link{PaletteOfStyles}} for more details and for instructions on how 293#' to modify the default behavior. 294#' @param guides TRUE (default), FALSE, or a function that accepts at least two 295#' arguments and requires no more than two arguments. Guides 296#' are additional context lines that are not strictly part of a hunk, but 297#' provide important contextual data (e.g. column headers). If TRUE, the 298#' context lines are shown in addition to the normal diff output, typically 299#' in a different color to indicate they are not part of the hunk. If a 300#' function, the function should accept as the first argument the object 301#' being diffed, and the second the character representation of the object. 302#' The function should return the indices of the elements of the 303#' character representation that should be treated as guides. See 304#' \code{\link{guides}} for more details. 305#' @param trim TRUE (default), FALSE, or a function that accepts at least two 306#' arguments and requires no more than two arguments. Function should compute 307#' for each line in captured output what portion of those lines should be 308#' diffed. By default, this is used to remove row meta data differences 309#' (e.g. \code{[1,]}) so they alone do not show up as differences in the 310#' diff. See \code{\link{trim}} for more details. 311#' @param rds TRUE (default) or FALSE, if TRUE will check whether 312#' \code{target} and/or \code{current} point to a file that can be read with 313#' \code{\link{readRDS}} and if so, loads the R object contained in the file 314#' and carries out the diff on the object instead of the original argument. 315#' Currently there is no mechanism for specifying additional arguments to 316#' \code{readRDS} 317#' @param unwrap.atomic TRUE (default) or FALSE. Relevant primarily for 318#' \code{diffPrint}, if TRUE, and \code{word.diff} is also TRUE, and both 319#' \code{target} and \code{current} are \emph{unnamed} one-dimension atomics , 320#' the vectors are unwrapped and diffed element by element, and then 321#' re-wrapped. Since \code{diffPrint} is fundamentally a line diff, the 322#' re-wrapped lines are lined up in a manner that is as consistent as possible 323#' with the unwrapped diff. Lines that contain the location of the word 324#' differences will be paired up. Since the vectors may well be wrapped with 325#' different periodicities this will result in lines that are paired up that 326#' look like they should not be paired up, though the locations of the 327#' differences should be. If is entirely possible that setting this parameter 328#' to FALSE will result in a slower diff. This happens if two vectors are 329#' actually fairly similar, but their line representations are not. For 330#' example, in comparing \code{1:100} to \code{c(100, 1:99)}, there is really 331#' only one difference at the \dQuote{word} level, but every screen line is 332#' different. \code{diffChr} will also do the unwrapping if it is given a 333#' character vector that contains output that looks like the atomic vectors 334#' described above. This is a bug, but as the functionality could be useful 335#' when diffing e.g. \code{capture.output} data, we now declare it a feature. 336#' @param line.limit integer(2L) or integer(1L), if length 1 how many lines of 337#' output to show, where \code{-1} means no limit. If length 2, the first 338#' value indicates the threshold of screen lines to begin truncating output, 339#' and the second the number of lines to truncate to, which should be fewer 340#' than the threshold. Note that this parameter is implemented on a 341#' best-efforts basis and should not be relied on to produce the exact 342#' number of lines requested. In particular do not expect it to work well for 343#' for values small enough that the banner portion of the diff would have to 344#' be trimmed. If you want a specific number of lines use \code{[} or 345#' \code{head} / \code{tail}. One advantage of \code{line.limit} over these 346#' other options is that you can combine it with \code{context="auto"} and 347#' auto \code{max.level} selection (the latter for \code{diffStr}), which 348#' allows the diff to dynamically adjust to make best use of the available 349#' display lines. \code{[}, \code{head}, and \code{tail} just subset the text 350#' of the output. 351#' @param hunk.limit integer(2L) or integer (1L), how many diff hunks to show. 352#' Behaves similarly to \code{line.limit}. How many hunks are in a 353#' particular diff is a function of how many differences, and also how much 354#' \code{context} is used since context can cause two hunks to bleed into 355#' each other and become one. 356#' @param max.diffs integer(1L), number of \emph{differences} (default 50000L) 357#' after which we abandon the \code{O(n^2)} diff algorithm in favor of a naive 358#' \code{O(n)} one. Set to \code{-1L} to stick to the original algorithm up to 359#' the maximum allowed (~INT_MAX/4). 360#' @param disp.width integer(1L) number of display columns to take up; note that 361#' in \dQuote{sidebyside} \code{mode} the effective display width is half this 362#' number (set to 0L to use default widths which are \code{getOption("width")} 363#' for normal styles and \code{80L} for HTML styles. Future versions of 364#' \code{diffobj} may change this to larger values for two dimensional objects 365#' for better diffs (see details). 366#' @param ignore.white.space TRUE or FALSE, whether to consider differences in 367#' horizontal whitespace (i.e. spaces and tabs) as differences (defaults to 368#' TRUE). 369#' @param convert.hz.white.space TRUE or FALSE, whether modify input strings 370#' that contain tabs and carriage returns in such a way that they display as 371#' they would \bold{with} those characters, but without using those 372#' characters (defaults to TRUE). The conversion assumes that tab stops are 373#' spaced evenly eight characters apart on the terminal. If this is not the 374#' case you may specify the tab stops explicitly with \code{tab.stops}. 375#' @param tab.stops integer, what tab stops to use when converting hard tabs to 376#' spaces. If not integer will be coerced to integer (defaults to 8L). You 377#' may specify more than one tab stop. If display width exceeds that 378#' addressable by your tab stops the last tab stop will be repeated. 379#' @param align numeric(1L) between 0 and 1, proportion of 380#' words in a line of \code{target} that must be matched in a line of 381#' \code{current} in the same hunk for those lines to be paired up when 382#' displayed (defaults to 0.25), or an \code{\link{AlignThreshold}} object. 383#' Set to \code{1} to turn off alignment which will cause all lines in a hunk 384#' from \code{target} to show up first, followed by all lines from 385#' \code{current}. Note that in order to be aligned lines must meet the 386#' threshold and have at least 3 matching alphanumeric characters (see 387#' \code{\link{AlignThreshold}} for details). 388#' @param style \dQuote{auto}, a \code{\link{Style}} object, or a list. 389#' \dQuote{auto} by default. If a \code{Style} object, will override the 390#' the \code{format}, \code{brightness}, and \code{color.mode} parameters. 391#' The \code{Style} object provides full control of diff output styling. 392#' If a list, then the same as \dQuote{auto}, except that if the auto-selected 393#' \code{Style} requires instantiation (see \code{\link{PaletteOfStyles}}), 394#' then the list contents will be used as arguments when instantiating the 395#' style object. See \code{\link{Style}} for more details, in particular the 396#' examples. 397#' @param palette.of.styles \code{\link{PaletteOfStyles}} object; advanced 398#' usage, contains all the \code{\link{Style}} objects or 399#' \dQuote{classRepresentation} objects extending \code{\link{Style}} that are 400#' selected by specifying the \code{format}, \code{brightness}, and 401#' \code{color.mode} parameters. See \code{\link{PaletteOfStyles}} for more 402#' details. 403#' @param frame an environment to use as the evaluation frame for the 404#' \code{print/show/str}, calls and for \code{diffObj}, the evaluation frame 405#' for the \code{diffPrint} / \code{diffStr} calls. Defaults to the return 406#' value of \code{\link{par_frame}}. 407#' @param interactive TRUE or FALSE whether the function is being run in 408#' interactive mode, defaults to the return value of 409#' \code{\link{interactive}}. If in interactive mode, pager will be used if 410#' \code{pager} is \dQuote{auto}, and if ANSI styles are not supported and 411#' \code{style} is \dQuote{auto}, output will be send to viewer/browser as 412#' HTML. 413#' @param term.colors integer(1L) how many ANSI colors are supported by the 414#' terminal. This variable is provided for when 415#' \code{\link[=num_colors]{crayon::num_colors}} does not properly detect how 416#' many ANSI colors are supported by your terminal. Defaults to return value 417#' of \code{\link[=num_colors]{crayon::num_colors}} and should be 8 or 256 to 418#' allow ANSI colors, or any other number to disallow them. This only 419#' impacts output format selection when \code{style} and \code{format} are 420#' both set to \dQuote{auto}. 421#' @param tar.banner character(1L), language, or NULL, used to generate the 422#' text to display ahead of the diff section representing the target output. 423#' If NULL will use the deparsed \code{target} expression, if language, will 424#' use the language as it would the \code{target} expression, if 425#' character(1L), will use the string with no modifications. The language 426#' mode is provided because \code{diffStr} modifies the expression prior to 427#' display (e.g. by wrapping it in a call to \code{str}). Note that it is 428#' possible in some cases that the substituted value of \code{target} actually 429#' is character(1L), but if you provide a character(1L) value here it will be 430#' assumed you intend to use that value literally. 431#' @param cur.banner character(1L) like \code{tar.banner}, but for 432#' \code{current} 433#' @param strip.sgr TRUE, FALSE, or NULL (default), whether to strip ANSI CSI 434#' SGR sequences prior to comparison and for display of diff. If NULL, 435#' resolves to TRUE if `style` resolves to an ANSI formatted diff, and 436#' FALSE otherwise. The default behavior is to avoid confusing diffs where 437#' the original SGR and the SGR added by the diff are mixed together. 438#' @param sgr.supported TRUE, FALSE, or NULL (default), whether to assume the 439#' standard output device supports ANSI CSI SGR sequences. If TRUE, strings 440#' will be manipulated accounting for the SGR sequences. If NULL, 441#' resolves to TRUE if `style` resolves to an ANSI formatted diff, and 442#' to `crayon::has_color()` otherwise. This only controls how the strings are 443#' manipulated, not whether SGR is added to format the diff, which is 444#' controlled by the `style` parameter. This parameter is exposed for the 445#' rare cases where you might wish to control string manipulation behavior 446#' directly. 447#' @param extra list additional arguments to pass on to the functions used to 448#' create text representation of the objects to diff (e.g. \code{print}, 449#' \code{str}, etc.) 450#' @param ... unused, for compatibility of methods with generics 451#' @return a \code{Diff} object; this object has a \code{show} 452#' method that will display the diff to screen or pager, as well as 453#' \code{summary}, \code{any}, and \code{as.character} methods. 454#' If you store the return value instead of displaying it to screen, and 455#' display it later, it is possible for the display to be thrown off if 456#' there are environment changes (e.g. display width changes) in between 457#' the time you compute the diff and the time you display it. 458#' @rdname diffPrint 459#' @name diffPrint 460#' @export 461#' @examples 462#' ## `pager="off"` for CRAN compliance; you may omit in normal use 463#' diffPrint(letters, letters[-5], pager="off") 464 465setGeneric( 466 "diffPrint", function(target, current, ...) standardGeneric("diffPrint") 467) 468 469#' @rdname diffPrint 470 471setMethod("diffPrint", signature=c("ANY", "ANY"), make_diff_fun(capt_print)) 472 473#' Diff Object Structures 474#' 475#' Compares the \code{str} output of \code{target} and \code{current}. If 476#' the \code{max.level} parameter to \code{str} is left unspecified, will 477#' attempt to find the largest \code{max.level} that fits within 478#' \code{line.limit} and shows at least one difference. 479#' 480#' Due to the seemingly inconsistent nature of \code{max.level} when used with 481#' objects with nested attributes, and also due to the relative slowness of 482#' \code{str}, this function simulates the effect of \code{max.level} by hiding 483#' nested lines instead of repeatedly calling \code{str} with varying values of 484#' \code{max.level}. 485#' 486#' @inheritParams diffPrint 487#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions, 488#' \code{\link{diffObj}}, \code{\link{diffStr}}, 489#' \code{\link{diffChr}} to compare character vectors directly, 490#' \code{\link{diffDeparse}} to compare deparsed objects, 491#' \code{\link{ses}} for a minimal and fast diff 492#' @return a \code{Diff} object; see \code{\link{diffPrint}}. 493#' @rdname diffStr 494#' @export 495#' @examples 496#' ## `pager="off"` for CRAN compliance; you may omit in normal use 497#' with(mtcars, diffStr(lm(mpg ~ hp)$qr, lm(mpg ~ disp)$qr, pager="off")) 498 499setGeneric("diffStr", function(target, current, ...) standardGeneric("diffStr")) 500 501#' @rdname diffStr 502 503setMethod("diffStr", signature=c("ANY", "ANY"), make_diff_fun(capt_str)) 504 505#' Diff Character Vectors Element By Element 506#' 507#' Will perform the diff on the actual string values of the character vectors 508#' instead of capturing the printed screen output. Each vector element is 509#' treated as a line of text. NA elements are treated as the string 510#' \dQuote{NA}. Non character inputs are coerced to character and attributes 511#' are dropped with \code{\link{c}}. 512#' 513#' @inheritParams diffPrint 514#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions, 515#' \code{\link{diffObj}}, \code{\link{diffStr}}, 516#' \code{\link{diffDeparse}} to compare deparsed objects, 517#' \code{\link{ses}} for a minimal and fast diff 518#' @return a \code{Diff} object; see \code{\link{diffPrint}}. 519#' @export 520#' @rdname diffChr 521#' @examples 522#' ## `pager="off"` for CRAN compliance; you may omit in normal use 523#' diffChr(LETTERS[1:5], LETTERS[2:6], pager="off") 524 525setGeneric("diffChr", function(target, current, ...) standardGeneric("diffChr")) 526 527#' @rdname diffChr 528 529setMethod("diffChr", signature=c("ANY", "ANY"), make_diff_fun(capt_chr)) 530 531#' Diff Deparsed Objects 532#' 533#' Perform diff on the character vectors produced by \code{\link{deparse}}ing 534#' the objects. Each element counts as a line. If an element contains newlines 535#' it will be split into elements new lines by the newlines. 536#' 537#' @export 538#' @inheritParams diffPrint 539#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions, 540#' \code{\link{diffObj}}, \code{\link{diffStr}}, 541#' \code{\link{diffChr}} to compare character vectors directly, 542#' \code{\link{ses}} for a minimal and fast diff 543#' @return a \code{Diff} object; see \code{\link{diffPrint}}. 544#' @export 545#' @rdname diffDeparse 546#' @examples 547#' ## `pager="off"` for CRAN compliance; you may omit in normal use 548#' diffDeparse(matrix(1:9, 3), 1:9, pager="off") 549 550setGeneric( 551 "diffDeparse", function(target, current, ...) standardGeneric("diffDeparse") 552) 553#' @rdname diffDeparse 554 555setMethod("diffDeparse", signature=c("ANY", "ANY"), make_diff_fun(capt_deparse)) 556 557#' Diff Files 558#' 559#' Reads text files with \code{\link{readLines}} and performs a diff on the 560#' resulting character vectors. 561#' 562#' @export 563#' @param target character(1L) or file connection with read capability; if 564#' character should point to a text file 565#' @param current like \code{target} 566#' @inheritParams diffPrint 567#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions, 568#' \code{\link{diffObj}}, \code{\link{diffStr}}, 569#' \code{\link{diffChr}} to compare character vectors directly, 570#' \code{\link{ses}} for a minimal and fast diff 571#' @return a \code{Diff} object; see \code{\link{diffPrint}}. 572#' @export 573#' @rdname diffFile 574#' @examples 575#' \dontrun{ 576#' url.base <- "https://raw.githubusercontent.com/wch/r-source" 577#' f1 <- file.path(url.base, "29f013d1570e1df5dc047fb7ee304ff57c99ea68/README") 578#' f2 <- file.path(url.base, "daf0b5f6c728bd3dbcd0a3c976a7be9beee731d9/README") 579#' diffFile(f1, f2) 580#' } 581 582setGeneric( 583 "diffFile", function(target, current, ...) standardGeneric("diffFile") 584) 585#' @rdname diffFile 586 587setMethod("diffFile", signature=c("ANY", "ANY"), make_diff_fun(capt_file)) 588 589#' Diff CSV Files 590#' 591#' Reads CSV files with \code{\link{read.csv}} and passes the resulting data 592#' frames onto \code{\link{diffPrint}}. \code{extra} values are passed as 593#' arguments are passed to both \code{read.csv} and \code{print}. To the 594#' extent you wish to use different \code{extra} arguments for each of those 595#' functions you will need to \code{read.csv} the files and pass them to 596#' \code{diffPrint} yourself. 597#' 598#' @export 599#' @param target character(1L) or file connection with read capability; 600#' if character should point to a CSV file 601#' @param current like \code{target} 602#' @inheritParams diffPrint 603#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions, 604#' \code{\link{diffObj}}, \code{\link{diffStr}}, 605#' \code{\link{diffChr}} to compare character vectors directly, 606#' \code{\link{ses}} for a minimal and fast diff 607#' @return a \code{Diff} object; see \code{\link{diffPrint}}. 608#' @export 609#' @rdname diffCsv 610#' @examples 611#' iris.2 <- iris 612#' iris.2$Sepal.Length[5] <- 99 613#' f1 <- tempfile() 614#' f2 <- tempfile() 615#' write.csv(iris, f1, row.names=FALSE) 616#' write.csv(iris.2, f2, row.names=FALSE) 617#' ## `pager="off"` for CRAN compliance; you may omit in normal use 618#' diffCsv(f1, f2, pager="off") 619#' unlink(c(f1, f2)) 620 621setGeneric( 622 "diffCsv", function(target, current, ...) standardGeneric("diffCsv") 623) 624#' @rdname diffCsv 625 626setMethod("diffCsv", signature=c("ANY", "ANY"), make_diff_fun(capt_csv)) 627 628#' Diff Objects 629#' 630#' Compare either the \code{print}ed or \code{str} screen representation of 631#' R objects depending on which is estimated to produce the most useful 632#' diff. The selection process tries to minimize screen lines while maximizing 633#' differences shown subject to display constraints. The decision algorithm is 634#' likely to evolve over time, so do not rely on this function making 635#' a particular selection under specific circumstances. Instead, use 636#' \code{\link{diffPrint}} or \code{\link{diffStr}} if you require one or the 637#' other output. 638#' 639#' @inheritParams diffPrint 640#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} methods, 641#' \code{\link{diffStr}}, 642#' \code{\link{diffChr}} to compare character vectors directly 643#' \code{\link{diffDeparse}} to compare deparsed objects, 644#' \code{\link{ses}} for a minimal and fast diff 645#' @return a \code{Diff} object; see \code{\link{diffPrint}}. 646#' @export 647#' @examples 648#' ## `pager="off"` for CRAN compliance; you may omit in normal use 649#' diffObj(letters, c(letters[1:10], LETTERS[11:26]), pager="off") 650#' with(mtcars, diffObj(lm(mpg ~ hp)$qr, lm(mpg ~ disp)$qr, pager="off")) 651 652setGeneric("diffObj", function(target, current, ...) standardGeneric("diffObj")) 653 654diff_obj <- make_diff_fun(identity) # we overwrite the body next 655body(diff_obj) <- quote({ 656 if(length(extra)) 657 stop("Argument `extra` must be empty in `diffObj`.") 658 659 # frame # force frame so that `par_frame` called in this context 660 661 # Need to generate calls inside a new child environment so that we do not 662 # pollute the environment and create potential conflicts with ... args 663 # used to run this inside a `local` call, but issues cropped up with the 664 # advent of JIT, and can't recall why just storing arguments at first 665 # was a problem 666 667 args <- as.list(environment()) 668 call.dat <- extract_call(sys.calls(), frame) 669 err <- make_err_fun(call.dat$call) 670 671 if(is.null(args$tar.banner)) args$tar.banner <- call("quote", call.dat$tar) 672 if(is.null(args$cur.banner)) args$cur.banner <- call("quote", call.dat$cur) 673 674 call.print <- as.call(c(list(quote(diffobj::diffPrint)), args)) 675 call.str <- as.call(c(list(quote(diffobj::diffStr)), args)) 676 call.str[["extra"]] <- list(max.level="auto") 677 res.print <- try(eval(call.print, frame), silent=TRUE) 678 res.str <- try(eval(call.str, frame), silent=TRUE) 679 680 if(inherits(res.str, "try-error")) 681 err( 682 "Error in calling `diffStr`: ", 683 conditionMessage(attr(res.str, "condition")) 684 ) 685 if(inherits(res.print, "try-error")) 686 err( 687 "Error in calling `diffPrint`: ", 688 conditionMessage(attr(res.print, "condition")) 689 ) 690 691 # Run both the print and str versions, and then decide which to use based 692 # on some weighting of various factors including how many lines needed to be 693 # omitted vs. how many differences were reported 694 695 diff.p <- count_diff_hunks(res.print@diffs) 696 diff.s <- count_diff_hunks(res.str@diffs) 697 diff.l.p <- diff_line_len( 698 res.print@diffs, res.print@etc, tar.capt=res.print@tar.dat$raw, 699 cur.capt=res.print@cur.dat$raw 700 ) 701 diff.l.s <- diff_line_len( 702 res.str@diffs, res.str@etc, tar.capt=res.str@tar.dat$raw, 703 cur.capt=res.str@cur.dat$raw 704 ) 705 706 # How many lines of the input are in the diffs, vs how many lines of input 707 708 diff.line.ratio.p <- lineCoverage(res.print) 709 diff.line.ratio.s <- lineCoverage(res.str) 710 711 # Only show the one with differences 712 713 res <- if(!diff.s && diff.p) { 714 res.print 715 } else if(!diff.p && diff.s) { 716 res.str 717 718 # If one fits in full and the other doesn't, show the one that fits in full 719 } else if( 720 !res.str@trim.dat$lines[[1L]] && 721 res.print@trim.dat$lines[[1L]] 722 ) { 723 res.str 724 } else if( 725 res.str@trim.dat$lines[[1L]] && 726 !res.print@trim.dat$lines[[1L]] 727 ) { 728 res.print 729 } else if (diff.l.p <= console_lines() / 2) { 730 # Always use print if print output is reasonable size 731 res.print 732 } else { 733 # Calculate the trade offs between the two options 734 s.score <- diff.s / diff.l.s * diff.line.ratio.s 735 p.score <- diff.p / diff.l.p * diff.line.ratio.p 736 if(p.score >= s.score) res.print else res.str 737 } 738 res 739}) 740#' @export 741setMethod("diffObj", signature=c("ANY", "ANY"), diff_obj) 742