1#!/usr/bin/env perl 2##!/usr/bin/perl -w 3# latexdiff - differences two latex files on the word level 4# and produces a latex file with the differences marked up. 5# 6# Copyright (C) 2004-20 F J Tilmann (tilmann@gfz-potsdam.de) 7# 8# Repository/issue tracker: https://github.com/ftilmann/latexdiff 9# CTAN page: http://www.ctan.org/pkg/latexdiff 10# 11# This program is free software: you can redistribute it and/or modify 12# it under the terms of the GNU General Public License as published by 13# the Free Software Foundation, either version 3 of the License, or 14# (at your option) any later version. 15# 16# This program is distributed in the hope that it will be useful, 17# but WITHOUT ANY WARRANTY; without even the implied warranty of 18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19# GNU General Public License for more details. 20# You should have received a copy of the GNU General Public License 21# along with this program. If not, see <http://www.gnu.org/licenses/>. 22# 23# Detailed usage information at the end of the file 24# 25 26# Version 1.3.1.1 27# - remove spurious \n to fix error: Unknown regexp modifier "/n" at .../latexdiff line 1974, near "=~ " (see github issue #201) 28# 29# Version 1.3.1 30# Bug fixes: 31# - remove some uninitialised variable $2 warnings in string substitution in flatten function in case included file is not found 32# - add minimal postprocessing to diff processing of preamble commands (replace \RIGHTBRACE by \} ) 33# - pre-processing: replace (contributed) routine take_comments_and_enter_from_frac() with take_comments_and_newline_from_frac(), which does the same thing 34# (remove whitespace characters and comments between the argument of \frac commands) in an easier and more robust way. In addition, it 35# will replace commands like \frac12 with \frac{1}{2} as pre-processing step. Fixes issue #184 36# - add "intertext" to list of unsafe math commands @UNSAFEMATHCMD . Fixes issue #179 37# - provide citation command patterns for biblatex and protect them with mbox'es. Fixes issue #199 38# - hardcode number of parameters for \href and \url commands to allow spaces between commands and arguments even if --allow-spaces option is not used (this 39# is needed because some bibliography styles add such in-command-sequence spaces) Fixes issues: #178 #198 40# - bibitem is now kept even in deleted blocks such that deleted references show up properly (this implies that the actual numbers in numerical referencing schemes will change) 41# (this is implemented by introducing a new class of commands KEEPCMD , which are kept as is in deleted environments (no effect in added environments). Currently 42# \bibitem is hardwired to be the only member of this class (fixes issue #194, #174) 43# Features: 44# - add some special processing for revtex bibliography commands, so that the spaces between bibliography commands \bibfield and \bibinfo and their arguments are ignored. 45# (fixes issue #194, should fix #174) 46# 47# Version 1.3.0 (7 October 2018): 48# - treat options to \documentclass as potential package names (some packages allow implicit loading of or imply selected packages 49# - improved pattern matching: now allows nested angular brackets, and is no longer confused by escaped curly braces 50# - improved pattern matching in COARSE mode: occasionally, the closing bracket or some other elements would be matched in an 'unnatural' way due to another sequence being more minimal in the computational sense, sometimes even causing errors due to tokens moving in or out of the scope of math environments. This is now discouraged by adding internal \DIFANCHOR commands (which are removed again in post-processing) (fixes issues reported via email by li_ruomeng . 51# - verbatim and lstlisting environments are marked-up with line-by-line in a similar style to non-verbatim text (requires the listing package to be installed) 52# (see new configuration variable VERBATIMLINEENV) (several issues and pull requests by jprotze) 53# - --flatten: now supports \verbatiminput and \lstlistinput 54# - --flatten: if file is not found, do not fail, simply warn and leave command unexpanded (inspired by issue #112). Don't warn if file name contains #[0-9] as it is then most likely an argument within a command definition rather than an actual file (applies to \input, \subfile, \include commands) 55# - added to textcmds: \intertext 56# - new config variable CUSTOMDIFCMD to allow defining special versions of commands in added or deleted blocks (Pull request by github user jprotze) 57# - added option -no-links (mostly for use by latexdiff-vc in only-changes modes) (Pull request by github user jprotze) 58# - new option --filter-script to run both input through a pre-processing script (PR jasonmccsmith #167) 59# new option --no-filter-stderr to hide stderr output from filter-script (potentially dangerous, as this might hide malfunctioning of filter scripts) 60# - --flatten now can deal with imports made using the import package {PR jasonmccsmith #173) 61# Bug fixes: 62# - pattern matching of \verb and \lstinline commands had an error which meant they would trigger on commands beginning with \verb. 63# - In description environments, mark up item descriptions by effectively reating the insides of item commannds as text commands (fixes #161) 64# 65# 66# Version 1.2.1 (22 June 2017) 67# - add "DeclareOldFontCommand" to styles using \bf or \sf old style font commands (fixies issue #92 ) 68# - improved markup: process lstinline commands in listings package correctly 69# for styles using colour, \verb and \lstinline arguments are marked up with colour (blue for added, red for deleted) 70# - bug fix: protecting inline math expressions for mbox did not work as intended (see stack exchange question: http://tex.stackexchange.com/questions/359412/compiling-the-latexdiff-when-adding-a-subscript-before-a-pmatrix-environment-cau) 71# - bug fix: when deleted \item commands are followed immediately by unsafe commands, they were not restored properly 72# (thanks to J. Protze for pull request) (pull request #89) 73# - treat lstlisting and comment as equivalent to verbatim environment 74# make environments that are treated like verbatim environments configurable (config variable VERBATIMENV) 75# treat lstinlne as equivalent to verb command 76# partially addresses issue #38 77# - refactoring: set default configuration variables in a hash, and those that correspond to lists 78# - feature: option --add-to-config used to amend configuration variables, which are regex pattern lists 79# - bug fix: deleted figures when endfloat package is activated 80# - bug fix: alignat environment now always processed correctly (fix issues #65) 81# - bug fix: avoid processing of commands as potential files in routine init_regex_arr (fix issue #70 ) 82# - minimal feature enhancement: treat '@' as allowed character in commands (strictly speaking requires prior \makeatletter statement, but always assuming it to be 83# @ a letter if it is part of a command name will usually lead to the correct behaviour (see http://tex.stackexchange.com/questions/346651/latexdiff-and-let) 84# - new feature/bug fix: --flatten option \endinput in included files now respected but only if \endinput stands right at the beginning of the line (issue #77) 85# - bug fix: flatten would incorrectly attempt to process commented out \include commands (from discussion in issue #77 ) 86# - introduce an invisible space (\hspace{0pt} after \mbox{..} auxiliary commands (not in math mode), to allow line breaks between added and deleted citations (change should not cause adverse behaviour otherwise) 87# 88# Version 1.2.0: 89# - highlight new and deleted figures 90# - bug fix in title mark-up. Previously deleted commands in title (such as \title, \author or \date) were marked up erroneously 91# - (minor) bug fixes in new 1.1.1 features: disabled label was commented out twice, additional spaces were introduced before list environment begin and end commands 92# - depracation fix: left brace in RegEx now needs to be escaped 93# - add type PDFCOMMENT based on issue #49 submitted by github user peci1 (Martin Pecka) 94# - make utf8 the default encoding 95# 96# Version 1.1.1 97# - patch mhchem: allow ce in equations 98# - flatten now also expands \input etc. in the preamble (but not \usepackage!) 99# - Better support for Japanese ( contributed by github user kshramt ) 100# - prevent duplicated verbatim hashes (patch contributed by github user therussianjig, issue #36) 101# - disable deleted label commands (fixes issue #31) 102# - introduce post-processing to reinstate most deleted environments and all needed item commands (fixes issue #1) 103# 104# Version 1.1.0 105# - treat diacritics (\",\', etc) as safe commands 106# - treat \_ and \& correctly as safe commands, even if used without spacing to the next word 107# - Add a BOLD markup type that sets added text in bold face (Contribution by Victor Zabalza via pull request ) 108# - add append-mboxsafecmd list option to be able to specify special safe commands which need to be surrounded by mbox to avoid breaking (mostly this is needed with ulem package) 109# - support for siunitx and cleveref packages: protect \SI command in siunitx package and \cref,\Cref{range}{*} in cleveref packages (thanks to Stefan Pinnow for testing) 110# - experimental support for chemformula, mhchem packages: define \ch and \ce in packages as safe (but not \ch,\cee in equation array environments) - these unfortunately will not be marked up (thanks to Stefan Pinnow for testing) 111# - bug fix: packages identified correctly even if \usepackage command options extend over several lines (previously \usepackage command needed to be fully contained in one line) 112# - new subtype ONLYCHANGEDPAGE outputs only changed pages (might not work well for floating material) 113# - new subtype ZLABEL operates similarly to LABEL but uses absolute page numbers (needs zref package) 114# - undocumented option --debug/--nodebug to override default setting for debug mode (Default: 0 for release version, 1: for development version 115# 116# Version 1.0.4 117# - introduce list UNSAFEMATHCMD, which holds list of commands which cannot be marked up with \DIFadd or \DIFdel commands (only relevant for WHOLE and COARSE math markup modes) 118# - new subtype LABEL which gives each change a label. This can later be used to only display pages where changes 119# have been made (instructions for that are put as comments into the diff'ed file) inspired by answer on http://tex.stackexchange.com/questions/166049/invisible-markers-in-pdfs-using-pdflatex 120# - Configuration variables take into accout some commands from additional packages: 121# tikzpicture environment now treated as PICTUREENV, and \smallmatrix in ARRENV (amsmath) 122# - --flatten: support for \subfile command (subfiles package) (in response to http://tex.stackexchange.com/questions/167620/latexdiff-with-subfiles ) 123# - --flatten: \bibliography commands expand if corresponding bbl file present 124# - angled bracket optional commands now parsed correctly (patch #3570) submitted by Dave Kleinschmidt (thanks) 125# - \RequirePackage now treated as synonym of \usepackage with respect to setting packages 126# - special rules for apacite package (redefine citation commands) 127# - recognise /dev/null as 'file-like' arguments for --preamble and --config options 128# - fix units package incompatibility with ulem for text maths statements $ ..$ (thanks to Stuart Prescott for reporting this) 129# - amsmath environment cases treated correctly (Bug fix #19029) (thanks to Jalar) 130# - {,} in comments no longer confuse latexdiff (Bug fix #19146) 131# - \% in one-letter sub/Superscripts was not converted correctly 132# 133# Version 1.0.3 134# - fix bug in add_safe_commands that made latexdiff hang on DeclareMathOperator 135# command in preamble 136# - \(..\) inline math expressions were not parsed correctly, if they contained a linebreak 137# - applied patch contributed by tomflannaghan via Berlios: [ Patch #3431 ] Adds correct handling of \left< and \right> 138# - \$ is treated correctly as a literal dollar sign (thanks to Reed Cartwright and Joshua Miller for reporting this bug 139# and sketching out the solution) 140# - \^ and \_ are correctly interpreted as accent and underlined space, respectively, not as superscript of subscript 141# (thanks to Wail Yahyaoui for pointing out this bug) 142# 143# Version 1.0.1 - treat \big,\bigg etc. equivalently to \left and 144# \right - include starred version in MATHENV - apply 145# - flatten recursively and --flatten expansion is now 146# aware of comments (thanks to Tim Connors for patch) 147# - Change to post-processing for more reliability for 148# deleted math environments 149# - On linux systems, recognise and remove DOS style newlines 150# - Provide markup for some special preamble commands (\title, 151# \author,\date, 152# - configurable by setting context2cmd 153# - for styles using ulem package, remove \emph and \text.. from list of 154# safe commands in order to allow linebreaks within the 155# highlighted sections. 156# - for ulem style, now show citations by enclosing them in \mbox commands. 157# This unfortunately implies linebreaks within citations no longer function, 158# so this functionality can be turned off (Option --disable-citation-markup). 159# With --enable-citation-markup, the mbox markup is forced for other styles) 160# - new substyle COLOR. This is particularly useful for marking up citations 161# and some special post-processing is implemented to retain cite 162# commands in deleted blocks. 163# - four different levels of math-markup 164# - Option --driver for choosing driver for modes employing changebar package 165# - accept \\* as valid command (and other commands of form \.*). Also accept 166# \<nl> (backslashed newline) 167# - some typo fixes, include commands defined in preamble as safe commands 168# (Sebastian Gouezel) 169# - include compared filenames as comments as line 2 and 3 of 170# the preamble (can be modified with option --label, and suppressed with 171# --no-label), option --visible-label to show files in generated pdf or dvi 172# at the beginning of main document 173# 174# Version 0.5 A number of minor improvements based on feedback 175# Deleted blocks are now shown before added blocks 176# Package specific processing 177# 178# Version 0.43 unreleased typo in list of styles at the end 179# Add protect to all \cbstart, \cbend commands 180# More robust substitution of deleted math commands 181# 182# Version 0.42 November 06 Bug fixes only 183# 184# Version 0.4 March 06 option for fast differencing using UNIX diff command, several minor bug fixes (\par bug, improved highlighting of textcmds) 185# 186# Version 0.3 August 05 improved parsing of displayed math, --allow-spaces 187# option, several minor bug fixes 188# 189# Version 0.25 October 04 Fix bug with deleted equations, add math mode commands to safecmd, add | to allowed interpunctuation signs 190# Version 0.2 September 04 extension to utf-8 and variable encodings 191# Version 0.1 August 04 First public release 192 193# Inserted block for differenceing 194# use Algorithm::Diff qw(traverse_sequences); 195# in standard version 196# The following BEGIN block contains a verbatim copy of 197# Ned Konz' Algorithm::Diff package version 1.15 except 198# that subroutine _longestCommonSubsequence has been replace by 199# a routine which internally uses the UNIX diff command for 200# the differencing rather than the Perl routines if the 201# length of the sequences exceeds some threshold. 202# Also, all POD documentation has been stripped out. 203# 204# (the distribution on which this modification is based is available 205# from http://search.cpan.org/~nedkonz/Algorithm-Diff-1.15 206# the most recent version can be found via http://search.cpan.org/search?module=Algorithm::Diff ) 207# Please note the LICENCE for Algorithm::Diff : 208# "Copyright (c) 2000-2002 Ned Konz. All rights reserved. 209# This program is free software; 210# you can redistribute it and/or modify it under the same terms 211# as Perl itself." 212# The fast-differencing version of latexdiff is provided as a convenience 213# for latex users under Unix-like systems which have a 'diff' command. 214# If you believe 215# the inlining of Algorithm::Diff violates its license please contact 216# me and I will modify the latexdiff distribution accordingly. 217# Frederik Tilmann (tilmann@esc.cam.ac.uk) 218# Jonathan Paisley is acknowledged for the idea of using the system diff 219# command to achieve shorter running times 220BEGIN { 221package Algorithm::Diff; 222use strict; 223use vars qw($VERSION @EXPORT_OK @ISA @EXPORT); 224use integer; # see below in _replaceNextLargerWith() for mod to make 225 # if you don't use this 226require Exporter; 227@ISA = qw(Exporter); 228@EXPORT = qw(); 229@EXPORT_OK = qw(LCS diff traverse_sequences traverse_balanced sdiff); 230$VERSION = sprintf('%d.%02d fast', (q$Revision: 1.15 $ =~ /\d+/g)); 231 232# Global parameters 233 234use File::Temp qw/tempfile/; 235# if larger number of elements in longestCommonSubsequence smaller than 236# this number, then use internal algorithm, otherwise use UNIX diff 237use constant THRESHOLD => 100 ; 238# Detect whether diff --minimal option is available 239# if yes we use it 240use constant MINIMAL => ( system('diff','--minimal','/dev/null','/dev/null') >> 8 ==0 ? "--minimal" : "" ) ; 241 242 243 244# McIlroy-Hunt diff algorithm 245# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com> 246# by Ned Konz, perl@bike-nomad.com 247 248 249# Create a hash that maps each element of $aCollection to the set of positions 250# it occupies in $aCollection, restricted to the elements within the range of 251# indexes specified by $start and $end. 252# The fourth parameter is a subroutine reference that will be called to 253# generate a string to use as a key. 254# Additional parameters, if any, will be passed to this subroutine. 255# 256# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen ); 257 258sub _withPositionsOfInInterval 259{ 260 my $aCollection = shift; # array ref 261 my $start = shift; 262 my $end = shift; 263 my $keyGen = shift; 264 my %d; 265 my $index; 266 for ( $index = $start ; $index <= $end ; $index++ ) 267 { 268 my $element = $aCollection->[$index]; 269 my $key = &$keyGen( $element, @_ ); 270 if ( exists( $d{$key} ) ) 271 { 272 unshift ( @{ $d{$key} }, $index ); 273 } 274 else 275 { 276 $d{$key} = [$index]; 277 } 278 } 279 return wantarray ? %d : \%d; 280} 281 282# Find the place at which aValue would normally be inserted into the array. If 283# that place is already occupied by aValue, do nothing, and return undef. If 284# the place does not exist (i.e., it is off the end of the array), add it to 285# the end, otherwise replace the element at that point with aValue. 286# It is assumed that the array's values are numeric. 287# This is where the bulk (75%) of the time is spent in this module, so try to 288# make it fast! 289 290sub _replaceNextLargerWith 291{ 292 my ( $array, $aValue, $high ) = @_; 293 $high ||= $#$array; 294 295 # off the end? 296 if ( $high == -1 || $aValue > $array->[-1] ) 297 { 298 push ( @$array, $aValue ); 299 return $high + 1; 300 } 301 302 # binary search for insertion point... 303 my $low = 0; 304 my $index; 305 my $found; 306 while ( $low <= $high ) 307 { 308 $index = ( $high + $low ) / 2; 309 310 # $index = int(( $high + $low ) / 2); # without 'use integer' 311 $found = $array->[$index]; 312 313 if ( $aValue == $found ) 314 { 315 return undef; 316 } 317 elsif ( $aValue > $found ) 318 { 319 $low = $index + 1; 320 } 321 else 322 { 323 $high = $index - 1; 324 } 325 } 326 327 # now insertion point is in $low. 328 $array->[$low] = $aValue; # overwrite next larger 329 return $low; 330} 331 332# This method computes the longest common subsequence in $a and $b. 333 334# Result is array or ref, whose contents is such that 335# $a->[ $i ] == $b->[ $result[ $i ] ] 336# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined. 337 338# An additional argument may be passed; this is a hash or key generating 339# function that should return a string that uniquely identifies the given 340# element. It should be the case that if the key is the same, the elements 341# will compare the same. If this parameter is undef or missing, the key 342# will be the element as a string. 343 344# By default, comparisons will use "eq" and elements will be turned into keys 345# using the default stringizing operator '""'. 346 347# Additional parameters, if any, will be passed to the key generation routine. 348 349sub _longestCommonSubsequence 350{ 351 my $a = shift; # array ref 352 my $b = shift; # array ref 353 my $keyGen = shift; # code ref 354 my $compare; # code ref 355 356 # set up code refs 357 # Note that these are optimized. 358 if ( !defined($keyGen) ) # optimize for strings 359 { 360 $keyGen = sub { $_[0] }; 361 $compare = sub { my ( $a, $b ) = @_; $a eq $b }; 362 } 363 else 364 { 365 $compare = sub { 366 my $a = shift; 367 my $b = shift; 368 &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ ); 369 }; 370 } 371 372 my ( $aStart, $aFinish, $bStart, $bFinish, $matchVector ) = 373 ( 0, $#$a, 0, $#$b, [] ); 374 375 # Check whether to use internal routine (small number of elements) 376 # or use it as a wrapper for UNIX diff 377 if ( ( $#$a > $#$b ? $#$a : $#$b) < THRESHOLD ) { 378 ### print STDERR "DEBUG: regular longestCommonSubsequence\n"; 379 # First we prune off any common elements at the beginning 380 while ( $aStart <= $aFinish 381 and $bStart <= $bFinish 382 and &$compare( $a->[$aStart], $b->[$bStart], @_ ) ) 383 { 384 $matchVector->[ $aStart++ ] = $bStart++; 385 } 386 387 # now the end 388 while ( $aStart <= $aFinish 389 and $bStart <= $bFinish 390 and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) ) 391 { 392 $matchVector->[ $aFinish-- ] = $bFinish--; 393 } 394 395 # Now compute the equivalence classes of positions of elements 396 my $bMatches = 397 _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ ); 398 my $thresh = []; 399 my $links = []; 400 401 my ( $i, $ai, $j, $k ); 402 for ( $i = $aStart ; $i <= $aFinish ; $i++ ) 403 { 404 $ai = &$keyGen( $a->[$i], @_ ); 405 if ( exists( $bMatches->{$ai} ) ) 406 { 407 $k = 0; 408 for $j ( @{ $bMatches->{$ai} } ) 409 { 410 411 # optimization: most of the time this will be true 412 if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j ) 413 { 414 $thresh->[$k] = $j; 415 } 416 else 417 { 418 $k = _replaceNextLargerWith( $thresh, $j, $k ); 419 } 420 421 # oddly, it's faster to always test this (CPU cache?). 422 if ( defined($k) ) 423 { 424 $links->[$k] = 425 [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ]; 426 } 427 } 428 } 429 } 430 431 if (@$thresh) 432 { 433 for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] ) 434 { 435 $matchVector->[ $link->[1] ] = $link->[2]; 436 } 437 } 438 } 439 else { 440 my ($fha,$fhb,$fna,$fnb,$ele,$key); 441 my ($alines,$blines,$alb,$alf,$blb,$blf); 442 my ($minimal)=MINIMAL; 443 # large number of elements, use system diff 444 ### print STDERR "DEBUG: fast (diff) longestCommonSubsequence\n"; 445 446 ($fha,$fna)=tempfile("DiffA-XXXX") or die "_longestCommonSubsequence: Cannot open tempfile for sequence A"; 447 ($fhb,$fnb)=tempfile("DiffB-XXXX") or die "_longestCommonSubsequence: Cannot open tempfile for sequence B"; 448 # prepare sequence A 449 foreach $ele ( @$a ) { 450 $key=&$keyGen( $ele, @_ ); 451 $key =~ s/\\/\\\\/g ; 452 $key =~ s/\n/\\n/sg ; 453 print $fha "$key\n" ; 454 } 455 close($fha); 456 # prepare sequence B 457 foreach $ele ( @$b ) { 458 $key=&$keyGen( $ele, @_ ); 459 $key =~ s/\\/\\\\/g ; 460 $key =~ s/\n/\\n/sg ; 461 print $fhb "$key\n" ; 462 } 463 close($fhb); 464 465 open(DIFFPIPE, "diff $minimal $fna $fnb |") or die "_longestCommonSubsequence: Cannot launch diff process. $!" ; 466 # The diff line numbering begins with 1, but Perl subscripts start with 0 467 # We follow the diff numbering but substract 1 when assigning to matchVector 468 $aStart++; $bStart++ ; $aFinish++ ; $bFinish++ ; 469 while( <DIFFPIPE> ) { 470 if ( ($alines,$blines) = ( m/^(\d*(?:,\d*)?)?c(\d*(?:,\d*)?)?$/ ) ) { 471 ($alb,$alf)=split(/,/,$alines); 472 ($blb,$blf)=split(/,/,$blines); 473 $alf=$alb unless defined($alf); 474 $blf=$blb unless defined($blf); 475 while($aStart < $alb ) { 476 $matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ; 477 } 478 # check for consistency 479 $bStart==$blb or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency in changed sequence"; 480 $aStart=$alf+1; 481 $bStart=$blf+1; 482 } 483 elsif ( ($alb,$blines) = ( m/^(\d*)a(\d*(?:,\d*)?)$/ ) ) { 484 ($blb,$blf)=split(/,/,$blines); 485 $blf=$blb unless defined($blf); 486 while ( $bStart < $blb ) { 487 $matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ; 488 } 489 $aStart==$alb+1 or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency in appended sequence near elements $aStart and $bStart"; 490 $bStart=$blf+1; 491 } 492 elsif ( ($alines,$blb) = ( m/^(\d*(?:,\d*)?)d(\d*)$/ ) ) { 493 ($alb,$alf)=split(/,/,$alines); 494 $alf=$alb unless defined($alf); 495 while ( $aStart < $alb ) { 496 $matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ; 497 } 498 $bStart==$blb+1 or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency in deleted sequence near elements $aStart and $bStart"; 499 $aStart=$alf+1; 500 } 501 elsif ( m/^Binary files/ ) { 502 # if diff reports it is a binary file force --text mode. I do not like 503 # to always use this option because it is probably only available in GNU diff 504 open(DIFFPIPE, "diff --text $fna $fnb |") or die "Cannot launch diff process. $!" ; 505 } 506 # Default: just skip line 507 } 508 while ($aStart <= $aFinish ) { 509 $matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ; 510 } 511 $bStart==$bFinish+1 or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency at end"; 512 close DIFFPIPE; 513 # check whether a system error has occurred or return status is greater than or equal to 5 514 if ( $! || ($? >> 8) > 5) { 515 print STDERR "diff process failed with exit code ", ($? >> 8), " $!\n"; 516 die; 517 } 518 unlink $fna,$fnb ; 519 } 520 return wantarray ? @$matchVector : $matchVector; 521} 522 523sub traverse_sequences 524{ 525 my $a = shift; # array ref 526 my $b = shift; # array ref 527 my $callbacks = shift || {}; 528 my $keyGen = shift; 529 my $matchCallback = $callbacks->{'MATCH'} || sub { }; 530 my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; 531 my $finishedACallback = $callbacks->{'A_FINISHED'}; 532 my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; 533 my $finishedBCallback = $callbacks->{'B_FINISHED'}; 534 my $matchVector = _longestCommonSubsequence( $a, $b, $keyGen, @_ ); 535 536 # Process all the lines in @$matchVector 537 my $lastA = $#$a; 538 my $lastB = $#$b; 539 my $bi = 0; 540 my $ai; 541 542 for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ ) 543 { 544 my $bLine = $matchVector->[$ai]; 545 if ( defined($bLine) ) # matched 546 { 547 &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine; 548 &$matchCallback( $ai, $bi++, @_ ); 549 } 550 else 551 { 552 &$discardACallback( $ai, $bi, @_ ); 553 } 554 } 555 556 # The last entry (if any) processed was a match. 557 # $ai and $bi point just past the last matching lines in their sequences. 558 559 while ( $ai <= $lastA or $bi <= $lastB ) 560 { 561 562 # last A? 563 if ( $ai == $lastA + 1 and $bi <= $lastB ) 564 { 565 if ( defined($finishedACallback) ) 566 { 567 &$finishedACallback( $lastA, @_ ); 568 $finishedACallback = undef; 569 } 570 else 571 { 572 &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB; 573 } 574 } 575 576 # last B? 577 if ( $bi == $lastB + 1 and $ai <= $lastA ) 578 { 579 if ( defined($finishedBCallback) ) 580 { 581 &$finishedBCallback( $lastB, @_ ); 582 $finishedBCallback = undef; 583 } 584 else 585 { 586 &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA; 587 } 588 } 589 590 &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA; 591 &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB; 592 } 593 594 return 1; 595} 596 597sub traverse_balanced 598{ 599 my $a = shift; # array ref 600 my $b = shift; # array ref 601 my $callbacks = shift || {}; 602 my $keyGen = shift; 603 my $matchCallback = $callbacks->{'MATCH'} || sub { }; 604 my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; 605 my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; 606 my $changeCallback = $callbacks->{'CHANGE'}; 607 my $matchVector = _longestCommonSubsequence( $a, $b, $keyGen, @_ ); 608 609 # Process all the lines in match vector 610 my $lastA = $#$a; 611 my $lastB = $#$b; 612 my $bi = 0; 613 my $ai = 0; 614 my $ma = -1; 615 my $mb; 616 617 while (1) 618 { 619 620 # Find next match indices $ma and $mb 621 do { $ma++ } while ( $ma <= $#$matchVector && !defined $matchVector->[$ma] ); 622 623 last if $ma > $#$matchVector; # end of matchVector? 624 $mb = $matchVector->[$ma]; 625 626 # Proceed with discard a/b or change events until 627 # next match 628 while ( $ai < $ma || $bi < $mb ) 629 { 630 631 if ( $ai < $ma && $bi < $mb ) 632 { 633 634 # Change 635 if ( defined $changeCallback ) 636 { 637 &$changeCallback( $ai++, $bi++, @_ ); 638 } 639 else 640 { 641 &$discardACallback( $ai++, $bi, @_ ); 642 &$discardBCallback( $ai, $bi++, @_ ); 643 } 644 } 645 elsif ( $ai < $ma ) 646 { 647 &$discardACallback( $ai++, $bi, @_ ); 648 } 649 else 650 { 651 652 # $bi < $mb 653 &$discardBCallback( $ai, $bi++, @_ ); 654 } 655 } 656 657 # Match 658 &$matchCallback( $ai++, $bi++, @_ ); 659 } 660 661 while ( $ai <= $lastA || $bi <= $lastB ) 662 { 663 if ( $ai <= $lastA && $bi <= $lastB ) 664 { 665 666 # Change 667 if ( defined $changeCallback ) 668 { 669 &$changeCallback( $ai++, $bi++, @_ ); 670 } 671 else 672 { 673 &$discardACallback( $ai++, $bi, @_ ); 674 &$discardBCallback( $ai, $bi++, @_ ); 675 } 676 } 677 elsif ( $ai <= $lastA ) 678 { 679 &$discardACallback( $ai++, $bi, @_ ); 680 } 681 else 682 { 683 684 # $bi <= $lastB 685 &$discardBCallback( $ai, $bi++, @_ ); 686 } 687 } 688 689 return 1; 690} 691 692sub LCS 693{ 694 my $a = shift; # array ref 695 my $matchVector = _longestCommonSubsequence( $a, @_ ); 696 my @retval; 697 my $i; 698 for ( $i = 0 ; $i <= $#$matchVector ; $i++ ) 699 { 700 if ( defined( $matchVector->[$i] ) ) 701 { 702 push ( @retval, $a->[$i] ); 703 } 704 } 705 return wantarray ? @retval : \@retval; 706} 707 708sub diff 709{ 710 my $a = shift; # array ref 711 my $b = shift; # array ref 712 my $retval = []; 713 my $hunk = []; 714 my $discard = sub { push ( @$hunk, [ '-', $_[0], $a->[ $_[0] ] ] ) }; 715 my $add = sub { push ( @$hunk, [ '+', $_[1], $b->[ $_[1] ] ] ) }; 716 my $match = sub { push ( @$retval, $hunk ) if scalar(@$hunk); $hunk = [] }; 717 traverse_sequences( $a, $b, 718 { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ ); 719 &$match(); 720 return wantarray ? @$retval : $retval; 721} 722 723sub sdiff 724{ 725 my $a = shift; # array ref 726 my $b = shift; # array ref 727 my $retval = []; 728 my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) }; 729 my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) }; 730 my $change = sub { 731 push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] ); 732 }; 733 my $match = sub { 734 push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] ); 735 }; 736 traverse_balanced( 737 $a, 738 $b, 739 { 740 MATCH => $match, 741 DISCARD_A => $discard, 742 DISCARD_B => $add, 743 CHANGE => $change, 744 }, 745 @_ 746 ); 747 return wantarray ? @$retval : $retval; 748} 749 7501; 751} 752import Algorithm::Diff qw(traverse_sequences); 753# End of inserted block for stand-alone version 754 755 756use Getopt::Long ; 757use strict ; 758use warnings; 759use utf8 ; 760 761use File::Spec ; 762 763my ($algodiffversion)=split(/ /,$Algorithm::Diff::VERSION); 764 765 766my ($versionstring)=<<EOF ; 767This is LATEXDIFF 1.3.1.1 (Algorithm::Diff $Algorithm::Diff::VERSION, Perl $^V) 768 (c) 2004-2020 F J Tilmann 769EOF 770 771# Hash with defaults for configuration variables. These marked undef have default values constructed from list defined in the DATA block 772# (under tag CONFIG) 773my %CONFIG=( 774 MINWORDSBLOCK => 3, # minimum number of tokens to form an independent block 775 # shorter identical blocks will be merged to the previous word 776 SCALEDELGRAPHICS => 0.5, # factor with which deleted figures will be scaled down (i.e. 0.5 implies they are shown at half linear size) 777 # this is only used for --graphics-markup=BOTH option 778 FLOATENV => undef , # Environments in which FL variants of defined commands are used 779 PICTUREENV => undef , # Environments in which all change markup is removed 780 MATHENV => undef , # Environments turning on display math mode (code also knows about \[ and \]) 781 MATHREPL => 'displaymath', # Environment introducing deleted maths blocks 782 MATHARRENV => undef , # Environments turning on eqnarray math mode 783 MATHARRREPL => 'eqnarray*', # Environment introducing deleted maths blocks 784 ARRENV => undef , # Environments making arrays in math mode. The underlining style does not cope well with those - as a result in-text math environments are surrounded by \mbox{ } if any of these commands is used in an inline math block 785 COUNTERCMD => undef, 786 # COUNTERCMD textcmds which are associated with a counter 787 # If any of these commands occur in a deleted block 788 # they will be followed by an \addtocounter{...}{-1} 789 # for the associated counter such that the overall numbers 790 # should be the same as in the new file 791 LISTENV => undef , # list making environments - they will generally be kept 792 VERBATIMENV => undef, # Environments whose content should be treated as verbatim text and not be touched 793 VERBATIMLINEENV => undef, # Environments whose content should be treated as verbatim text and processed in line diff mode 794 CUSTOMDIFCMD => undef,# Custom dif command. Is defined in the document as a \DELcommand and \ADDcommand version to be replaced by the diff 795 ITEMCMD => 'item' # command marking item in a list environment 796); 797# Configuration variables: these have to be visible from the subroutines 798my ($ARRENV, 799 $COUNTERCMD, 800 $FLOATENV, 801 $ITEMCMD, 802 $LISTENV, 803 $MATHARRENV, 804 $MATHARRREPL, 805 $MATHENV, 806 $MATHREPL, 807 $MINWORDSBLOCK, 808 $PICTUREENV, 809 $SCALEDELGRAPHICS, 810 $VERBATIMENV, 811 $VERBATIMLINEENV, 812 $CUSTOMDIFCMD 813 ); 814 815# my $MINWORDSBLOCK=3; # minimum number of tokens to form an independent block 816# # shorter identical blocks will be merged to the previous word 817# my $SCALEDELGRAPHICS=0.5; # factor with which deleted figures will be scaled down (i.e. 0.5 implies they are shown at half linear size) 818# # this is only used for --graphics-markup=BOTH option 819# my $FLOATENV='(?:figure|table|plate)[\w\d*@]*' ; # Environments in which FL variants of defined commands are used 820# my $PICTUREENV='(?:picture|tikzpicture|DIFnomarkup)[\w\d*@]*' ; # Environments in which all change markup is removed 821# my $MATHENV='(?:equation[*]?|displaymath|DOLLARDOLLAR)[*]?' ; # Environments turning on display math mode (code also knows about \[ and \]) 822# my $MATHREPL='displaymath'; # Environment introducing deleted maths blocks 823# my $MATHARRENV='(?:eqnarray|align|alignat|gather|multline|flalign)[*]?' ; # Environments turning on eqnarray math mode 824# my $MATHARRREPL='eqnarray*'; # Environment introducing deleted maths blocks 825# my $ARRENV='(?:aligned|gathered|array|[pbvBV]?matrix|smallmatrix|cases|split)'; # Environments making arrays in math mode. The underlining style does not cope well with those - as a result in-text math environments are surrounded by \mbox{ } if any of these commands is used in an inline math block 826# my $COUNTERCMD='(?:footnote|part|chapter|section|subsection|subsubsection|paragraph|subparagraph)'; # textcmds which are associated with a counter 827# # If any of these commands occur in a deleted block 828# # they will be succeeded by an \addtocounter{...}{-1} 829# # for the associated counter such that the overall numbers 830# # should be the same as in the new file 831# my $LISTENV='(?:itemize|description|enumerate)'; # list making environments - they will generally be kept 832# my $ITEMCMD='item'; # command marking item in a list environment 833 834 835 836my $LABELCMD='(?:label)'; # matching commands are disabled within deleted blocks - mostly useful for maths mode, as otherwise it would be fine to just not add those to SAFECMDLIST 837my @UNSAFEMATHCMD=('qedhere','intertext'); # Commands which are definitely unsafe for marking up in math mode (amsmath qedhere only tested to not work with UNDERLINE markup) (only affects WHOLE and COARSE math markup modes). Note that unlike text mode (or FINE math mode0 deleted unsafe commands are not deleted but simply taken outside \DIFdel 838my $MBOXINLINEMATH=0; # if set to 1 then surround marked-up inline maths expression with \mbox ( to get around compatibility 839 # problems between some maths packages and ulem package 840 841 842# Markup strings 843# If at all possible, do not change these as parts of the program 844# depend on the actual name (particularly post-processing) 845# At the very least adapt subroutine postprocess to new tokens. 846my $ADDMARKOPEN='\DIFaddbegin '; # Token to mark begin of appended text 847my $ADDMARKCLOSE='\DIFaddend '; # Token to mark end of appended text 848my $ADDOPEN='\DIFadd{'; # To mark begin of added text passage 849my $ADDCLOSE='}'; # To mark end of added text passage 850my $ADDCOMMENT='DIF > '; # To mark added comment line 851my $DELMARKOPEN='\DIFdelbegin '; # Token to mark begin of deleted text 852my $DELMARKCLOSE='\DIFdelend '; # Token to mark end of deleted text 853my $DELOPEN='\DIFdel{'; # To mark begin of deleted text passage 854my $DELCLOSE='}'; # To mark end of deleted text passage 855my $DELCMDOPEN='%DIFDELCMD < '; # To mark begin of deleted commands (must begin with %, i.e., be a comment 856my $DELCMDCLOSE="%%%\n"; # To mark end of deleted commands (must end with a new line) 857my $AUXCMD='%DIFAUXCMD' ; # follows auxiliary commands put in by latexdiff to make difference file legal 858 # auxiliary commands must be on a line of their own 859 # Note that for verbatim environment openings the %DIFAUXCMD cannot be placed in 860 # the same line as this would mean they are shown 861 # so the special form "%DIFAUXCMD NEXT" is used to indicate that the next line 862 # is an auxiliary command 863 # Similarly "%DIFAUXCMD LAST" would indicate the auxiliary command is in previous line (not currently used) 864my $DELCOMMENT='DIF < '; # To mark deleted comment line 865my $VERBCOMMENT='DIFVRB '; # to mark lines which are within a verbatim environment 866 867# main local variables: 868my @TEXTCMDLIST=(); # array containing patterns of commands with text arguments 869my @TEXTCMDEXCL=(); # array containing patterns of commands without text arguments (if a pattern 870 # matches both TEXTCMDLIST and TEXTCMDEXCL it is excluded) 871my @CONTEXT1CMDLIST=(); # array containing patterns of commands with text arguments (subset of text commands), 872 # but which cause confusion if used out of context (e.g. \caption). 873 # In deleted passages, the command will be disabled but its argument is marked up 874 # Otherwise they behave exactly like TEXTCMD's 875my @CONTEXT1CMDEXCL=(); # exclude list for above, but always empty 876my @CONTEXT2CMDLIST=(); # array containing patterns of commands with text arguments, but which fail or cause confusion 877 # if used out of context (e.g. \title). They and their arguments will be disabled in deleted 878 # passages 879my @CONTEXT2CMDEXCL=(); # exclude list for above, but always empty 880my @MATHTEXTCMDLIST=(); # treat like textcmd. If a textcmd is in deleted or added block, just wrap the 881 # whole content with \DIFadd or \DIFdel irrespective of content. This functionality 882 # is useful for pseudo commands \MATHBLOCK.. into which math environments are being 883 # transformed 884my @MATHTEXTCMDEXCL=(); # 885 886# Note I need to declare this with "our" instead of "my" because later in the code I have to "local"ise these 887our @SAFECMDLIST=(); # array containing patterns of safe commands (which do not break when in the argument of DIFadd or DIFDEL) 888our @SAFECMDEXCL=(); 889my @MBOXCMDLIST=(); # patterns for commands which are in principle safe but which need to be surrounded by an \mbox 890my @MBOXCMDEXCL=(); # all the patterns in MBOXCMDLIST will be appended to SAFECMDLIST 891 892my @KEEPCMDLIST=( qr/^bibitem$/ ); # patterns for commands which should not be deleted in nominally delete text passages 893my @KEEPCMDEXCL=(); 894 895my ($i,$j,$l); 896my ($old,$new); 897my ($line,$key); 898my (@dumlist); 899my ($newpreamble,$oldpreamble); 900my (@newpreamble,@oldpreamble,@diffpreamble,@diffbody); 901my ($latexdiffpreamble); 902my ($oldbody, $newbody, $diffbo); 903my ($oldpost, $newpost); 904my ($diffall); 905# Option names 906my ($type,$subtype,$floattype,$config,$preamblefile,$encoding,$nolabel,$visiblelabel, 907 $filterscript,$ignorefilterstderr, 908 $showpreamble,$showsafe,$showtext,$showconfig,$showall, 909 $replacesafe,$appendsafe,$excludesafe, 910 $replacetext,$appendtext,$excludetext, 911 $replacecontext1,$appendcontext1, 912 $replacecontext2,$appendcontext2, 913 $help,$verbose,$driver,$version,$ignorewarnings, 914 $enablecitmark,$disablecitmark,$allowspaces,$flatten,$nolinks,$debug,$earlylatexdiffpreamble); ###$disablemathmark, 915my ($mboxsafe); 916# MNEMNONICS for mathmarkup 917my $mathmarkup; 918use constant { 919 OFF => 0, 920 WHOLE => 1, 921 COARSE => 2, 922 FINE => 3 923}; 924# MNEMNONICS for graphicsmarkup 925my $graphicsmarkup; 926use constant { 927 NONE => 0, 928 NEWONLY => 1, 929 BOTH => 2 930}; 931 932my ($mboxcmd); 933 934my (@configlist,@addtoconfiglist,@labels, 935 @appendsafelist,@excludesafelist, 936 @appendmboxsafelist,@excludemboxsafelist, 937 @appendtextlist,@excludetextlist, 938 @appendcontext1list,@appendcontext2list, 939 @packagelist); 940my ($assign,@config); 941# Hash where keys corresponds to the names of all included packages (including the documentclass as another package 942# the optional arguments to the package are the values of the hash elements 943my ($pkg,%packages); 944 945# Defaults 946$mathmarkup=COARSE; 947$verbose=0; 948# output debug and intermediate files, set to 0 in final distribution 949$debug=0; 950# insert preamble directly after documentclass - experimental feature, set to 0 in final distribution 951# Note that this failed with mini example (or other files, where packages used in latexdiff preamble 952# are called again with incompatible options in preamble of resulting file) 953$earlylatexdiffpreamble=0; 954 955 956# define character properties 957sub IsNonAsciiPunct { return <<'END' # Unicode punctuation but excluding ASCII punctuation 958+utf8::IsPunct 959-utf8::IsASCII 960END 961} 962sub IsNonAsciiS { return <<'END' # Unicode symbol but excluding ASCII 963+utf8::IsS 964-utf8::IsASCII 965END 966} 967 968 969my %verbhash; 970 971Getopt::Long::Configure('bundling'); 972GetOptions('type|t=s' => \$type, 973 'subtype|s=s' => \$subtype, 974 'floattype|f=s' => \$floattype, 975 'config|c=s' => \@configlist, 976 'add-to-config=s' => \@addtoconfiglist, 977 'preamble|p=s' => \$preamblefile, 978 'encoding|e=s' => \$encoding, 979 'label|L=s' => \@labels, 980 'no-label' => \$nolabel, 981 'visible-label' => \$visiblelabel, 982 'exclude-safecmd|A=s' => \@excludesafelist, 983 'replace-safecmd=s' => \$replacesafe, 984 'append-safecmd|a=s' => \@appendsafelist, 985 'exclude-textcmd|X=s' => \@excludetextlist, 986 'replace-textcmd=s' => \$replacetext, 987 'append-textcmd|x=s' => \@appendtextlist, 988 'replace-context1cmd=s' => \$replacecontext1, 989 'append-context1cmd=s' => \@appendcontext1list, 990 'replace-context2cmd=s' => \$replacecontext2, 991 'append-context2cmd=s' => \@appendcontext2list, 992 'exclude-mboxsafecmd=s' => \@excludemboxsafelist, 993 'append-mboxsafecmd=s' => \@appendmboxsafelist, 994 'show-preamble' => \$showpreamble, 995 'show-safecmd' => \$showsafe, 996 'show-textcmd' => \$showtext, 997 'show-config' => \$showconfig, 998 'show-all' => \$showall, 999 'packages=s' => \@packagelist, 1000 'allow-spaces' => \$allowspaces, 1001 'math-markup=s' => \$mathmarkup, 1002 'graphics-markup=s' => \$graphicsmarkup, 1003 'enable-citation-markup|enforce-auto-mbox' => \$enablecitmark, 1004 'disable-citation-markup|disable-auto-mbox' => \$disablecitmark, 1005 'verbose|V' => \$verbose, 1006 'ignore-warnings' => \$ignorewarnings, 1007 'driver=s'=> \$driver, 1008 'flatten' => \$flatten, 1009 'filter-script=s' => \$filterscript, 1010 'ignore-filter-stderr' => \$ignorefilterstderr, 1011 'no-links' => \$nolinks, 1012 'version' => \$version, 1013 'help|h' => \$help, 1014 'debug!' => \$debug ) or die "Use latexdiff -h to get help.\n" ; 1015 1016if ( $help ) { 1017 usage() ; 1018} 1019 1020 1021if ( $version ) { 1022 die $versionstring ; 1023} 1024 1025print STDERR $versionstring if $verbose; 1026 1027if (defined($showall)){ 1028 $showpreamble=$showsafe=$showtext=$showconfig=1; 1029} 1030# Default types 1031$type='UNDERLINE' unless defined($type); 1032$subtype='SAFE' unless defined($subtype); 1033# set floattype to IDENTICAL for LABEL and ONLYCHANGEDPAGE subtype, unless it has been set explicitly on the command line 1034$floattype=($subtype eq 'LABEL' || $subtype eq 'ONLYCHANGEDPAGE') ? 'IDENTICAL' : 'FLOATSAFE' unless defined($floattype); 1035if ( $subtype eq 'LABEL' ) { 1036 print STDERR "Note that LABEL subtype is deprecated. If possible, use ZLABEL instead (requires zref package)"; 1037} 1038 1039if (defined($mathmarkup)) { 1040 $mathmarkup=~tr/a-z/A-Z/; 1041 if ( $mathmarkup eq 'OFF' ){ 1042 $mathmarkup=OFF; 1043 } elsif ( $mathmarkup eq 'WHOLE' ){ 1044 $mathmarkup=WHOLE; 1045 } elsif ( $mathmarkup eq 'COARSE' ){ 1046 $mathmarkup=COARSE; 1047 } elsif ( $mathmarkup eq 'FINE' ){ 1048 $mathmarkup=FINE; 1049 } elsif ( $mathmarkup !~ m/^[0123]$/ ) { 1050 die "latexdiff Illegal value: ($mathmarkup) for option--math-markup. Possible values: OFF,WHOLE,COARSE,FINE,0-3\n"; 1051 } 1052 # else use numerical value 1053} 1054 1055# Give filterscript a default empty string 1056$filterscript="" unless defined($filterscript); 1057 1058# setting extra preamble commands 1059if (defined($preamblefile)) { 1060 $latexdiffpreamble=join "\n",(extrapream($preamblefile),""); 1061} else { 1062 $latexdiffpreamble=join "\n",(extrapream($type,$subtype,$floattype),""); 1063} 1064 1065if ( defined($driver) ) { 1066 # for changebar only 1067 $latexdiffpreamble=~s/\[dvips\]/[$driver]/sg; 1068} 1069# setting up @SAFECMDLIST and @SAFECMDEXCL 1070if (defined($replacesafe)) { 1071 init_regex_arr_ext(\@SAFECMDLIST,$replacesafe); 1072} else { 1073 init_regex_arr_data(\@SAFECMDLIST, "SAFE COMMANDS"); 1074} 1075foreach $appendsafe ( @appendsafelist ) { 1076 init_regex_arr_ext(\@SAFECMDLIST, $appendsafe); 1077} 1078foreach $excludesafe ( @excludesafelist ) { 1079 init_regex_arr_ext(\@SAFECMDEXCL, $excludesafe); 1080} 1081# setting up @MBOXCMDLIST and @MBOXCMDEXCL 1082foreach $mboxsafe ( @appendmboxsafelist ) { 1083 init_regex_arr_ext(\@MBOXCMDLIST, $mboxsafe); 1084} 1085foreach $mboxsafe ( @excludemboxsafelist ) { 1086 init_regex_arr_ext(\@MBOXCMDEXCL, $mboxsafe); 1087} 1088 1089 1090 1091# setting up @TEXTCMDLIST and @TEXTCMDEXCL 1092if (defined($replacetext)) { 1093 init_regex_arr_ext(\@TEXTCMDLIST,$replacetext); 1094} else { 1095 init_regex_arr_data(\@TEXTCMDLIST, "TEXT COMMANDS"); 1096} 1097foreach $appendtext ( @appendtextlist ) { 1098 init_regex_arr_ext(\@TEXTCMDLIST, $appendtext); 1099} 1100foreach $excludetext ( @excludetextlist ) { 1101 init_regex_arr_ext(\@TEXTCMDEXCL, $excludetext); 1102} 1103 1104 1105# setting up @CONTEXT1CMDLIST ( @CONTEXT1CMDEXCL exist but is always empty ) 1106if (defined($replacecontext1)) { 1107 init_regex_arr_ext(\@CONTEXT1CMDLIST,$replacecontext1); 1108} else { 1109 init_regex_arr_data(\@CONTEXT1CMDLIST, "CONTEXT1 COMMANDS"); 1110} 1111foreach $appendcontext1 ( @appendcontext1list ) { 1112 init_regex_arr_ext(\@CONTEXT1CMDLIST, $appendcontext1); 1113} 1114 1115 1116# setting up @CONTEXT2CMDLIST ( @CONTEXT2CMDEXCL exist but is always empty ) 1117if (defined($replacecontext2)) { 1118 init_regex_arr_ext(\@CONTEXT2CMDLIST,$replacecontext2); 1119} else { 1120 init_regex_arr_data(\@CONTEXT2CMDLIST, "CONTEXT2 COMMANDS"); 1121} 1122foreach $appendcontext2 ( @appendcontext2list ) { 1123 init_regex_arr_ext(\@CONTEXT2CMDLIST, $appendcontext2); 1124} 1125 1126# setting configuration variables 1127@config=(); 1128foreach $config ( @configlist ) { 1129 if (-f $config || lc $config eq '/dev/null' ) { 1130 open(FILE,$config) or die ("Couldn't open configuration file $config: $!"); 1131 while (<FILE>) { 1132 chomp; 1133 next if /^\s*#/ || /^\s*%/ || /^\s*$/ ; 1134 push (@config,$_); 1135 } 1136 close(FILE); 1137 } 1138 else { 1139# foreach ( split(",",$config) ) { 1140# push @config,$_; 1141# } 1142 push @config,split(",",$config) 1143 } 1144} 1145foreach $assign ( @config ) { 1146 $assign=~ m/\s*(\w*)\s*=\s*(\S*)\s*$/ or die "Illegal assignment $assign in configuration list (must be variable=value)"; 1147 exists $CONFIG{$1} or die "Unknown configuration variable $1."; 1148 $CONFIG{$1}=$2; 1149} 1150 1151my @addtoconfig=(); 1152foreach $config ( @addtoconfiglist ) { 1153 if (-f $config || lc $config eq '/dev/null' ) { 1154 open(FILE,$config) or die ("Couldn't open addd-to-config file $config: $!"); 1155 while (<FILE>) { 1156 chomp; 1157 next if /^\s*#/ || /^\s*%/ || /^\s*$/ ; 1158 push (@addtoconfig,$_); 1159 } 1160 close(FILE); 1161 } 1162 else { 1163# foreach ( split(",",$config) ) { 1164# push @addtoconfig,$_; 1165# } 1166 push @addtoconfig,split(",",$config) 1167 } 1168} 1169 1170# initialise default lists from DATA 1171# for those configuration variables, which have not been set explicitly, initiate from list in document 1172foreach $key ( keys(%CONFIG) ) { 1173 if (!defined $CONFIG{$key}) { 1174 @dumlist=(); 1175 init_regex_arr_data(\@dumlist,"$key CONFIG"); 1176 $CONFIG{$key}=join(";",@dumlist) 1177 } 1178} 1179 1180 1181foreach $assign ( @addtoconfig ) { 1182 ###print STDERR "assign:|$assign|\n"; 1183 $assign=~ m/\s*(\w*)\s*=\s*(\S*)\s*$/ or die "Illegal assignment $assign in configuration list (must be variable=value)"; 1184 exists $CONFIG{$1} or die "Unknown configuration variable $1."; 1185 $CONFIG{$1}.=";$2"; 1186} 1187 1188# Map from hash to variables (we do this to have more concise code later, change from comma-separated list) 1189foreach ( keys(%CONFIG) ) { 1190 if ( $_ eq "MINWORDSBLOCK" ) { $MINWORDSBLOCK = $CONFIG{$_}; } 1191 elsif ( $_ eq "FLOATENV" ) { $FLOATENV = liststringtoregex($CONFIG{$_}) ; } 1192 elsif ( $_ eq "ITEMCMD" ) { $ITEMCMD = $CONFIG{$_} ; } 1193 elsif ( $_ eq "LISTENV" ) { $LISTENV = liststringtoregex($CONFIG{$_}) ; } 1194 elsif ( $_ eq "PICTUREENV" ) { $PICTUREENV = liststringtoregex($CONFIG{$_}) ; } 1195 elsif ( $_ eq "MATHENV" ) { $MATHENV = liststringtoregex($CONFIG{$_}) ; } 1196 elsif ( $_ eq "MATHREPL" ) { $MATHREPL = $CONFIG{$_} ; } 1197 elsif ( $_ eq "MATHARRENV" ) { $MATHARRENV = liststringtoregex($CONFIG{$_}) ; } 1198 elsif ( $_ eq "MATHARRREPL" ) { $MATHARRREPL = $CONFIG{$_} ; } 1199 elsif ( $_ eq "ARRENV" ) { $ARRENV = liststringtoregex($CONFIG{$_}) ; } 1200 elsif ( $_ eq "VERBATIMENV" ) { $VERBATIMENV = liststringtoregex($CONFIG{$_}) ; } 1201 elsif ( $_ eq "VERBATIMLINEENV" ) { $VERBATIMLINEENV = liststringtoregex($CONFIG{$_}) ; } 1202 elsif ( $_ eq "CUSTOMDIFCMD" ) { $CUSTOMDIFCMD = liststringtoregex($CONFIG{$_}) ; } 1203 elsif ( $_ eq "COUNTERCMD" ) { $COUNTERCMD = liststringtoregex($CONFIG{$_}) ; } 1204 elsif ( $_ eq "SCALEDELGRAPHICS" ) { $SCALEDELGRAPHICS = $CONFIG{$_} ; } 1205 else { die "Unknown configuration variable $_.";} 1206} 1207 1208if ( $mathmarkup == COARSE || $mathmarkup == WHOLE ) { 1209 push(@MATHTEXTCMDLIST,qr/^MATHBLOCK(?:$MATHENV|$MATHARRENV|SQUAREBRACKET)$/); 1210} 1211 1212 1213 1214 1215 1216foreach $pkg ( @packagelist ) { 1217 map { $packages{$_}="" } split(/,/,$pkg) ; 1218} 1219 1220 1221if ($showconfig || $showtext || $showsafe || $showpreamble) { 1222 1223 show_configuration(); 1224 exit 0; 1225} 1226 1227 1228if ( @ARGV != 2 ) { 1229 print STDERR "2 and only 2 non-option arguments required. Write latexdiff -h to get help\n"; 1230 exit(2); 1231} 1232 1233# Are extra spaces between command arguments permissible? 1234my $extraspace; 1235if ($allowspaces) { 1236 $extraspace='\s*'; 1237} else { 1238 $extraspace=''; 1239} 1240 1241# append context lists to text lists (as text property is implied) 1242push @TEXTCMDLIST, @CONTEXT1CMDLIST; 1243push @TEXTCMDLIST, @CONTEXT2CMDLIST; 1244 1245push @TEXTCMDLIST, @MATHTEXTCMDLIST if $mathmarkup==COARSE; 1246 1247# internal additions to SAFECMDLIST 1248push(@SAFECMDLIST, qr/^QLEFTBRACE$/, qr/^QRIGHTBRACE$/); 1249 1250 1251# Patterns. These are used by some of the subroutines, too 1252# I can only define them down here because value of extraspace depends on an option 1253 1254 my $pat0 = '(?:[^{}])*'; 1255 my $pat_n = $pat0; 1256# if you get "undefined control sequence MATHBLOCKmath" error, increase the maximum value in this loop 1257 for (my $i_pat = 0; $i_pat < 20; ++$i_pat){ 1258 $pat_n = '(?:[^{}]|\{'.$pat_n.'\}|\\\\\{|\\\\\})*'; 1259 # Actually within the text body, quoted braces are replaced in pre-processing. The only place where 1260 # the last part of the pattern matters is when processing the arguments of context2cmds in the preamble 1261 # and these contain a \{ or \} combination, probably rare. 1262 # It should thus be fine to use the simpler version below. 1263 ### $pat_n = '(?:[^{}]|\{'.$pat_n.'\})*'; 1264 } 1265 1266 my $brat0 = '(?:[^\[\]]|\\\[|\\\])*'; 1267 my $brat_n = $brat0; 1268 for (my $i_pat = 0; $i_pat < 4; ++$i_pat){ 1269 $brat_n = '(?:[^\[\]]|\['.$brat_n.'\]|\\\[|\\\])*'; 1270 ### $brat_n = '(?:[^\[\]]|\['.$brat_n.'\])*'; # Version not taking into account escaped \[ and \] 1271 } 1272 my $abrat0 = '(?:[^<>])*'; 1273 1274 my $quotemarks = '(?:\'\')|(?:\`\`)'; 1275 my $punct='[0.,\/\'\`:;\"\?\(\)\[\]!~\p{IsNonAsciiPunct}\p{IsNonAsciiS}]'; 1276 my $number='-?\d*\.\d*'; 1277 my $mathpunct='[+=<>\-\|]'; 1278 my $and = '&'; 1279 my $coords= '[\-.,\s\d]*'; 1280# quoted underscore - this needs special treatment as perl treats _ as a letter (\w) but latex does not 1281# such that a\_b is interpreted as a{\_}b by latex but a{\_b} by perl 1282 my $quotedunderscore='\\\\_'; 1283# word: sequence of letters or accents followed by letter 1284 my $word_ja='\p{Han}+|\p{InHiragana}+|\p{InKatakana}+'; 1285 my $word='(?:' . $word_ja . '|(?:(?:[-\w\d*]|\\\\[\"\'\`~^][A-Za-z\*])(?!(?:' . $word_ja . ')))+)'; 1286 1287 # for selected commands, the number of arguments is known, and we can therefore allow spaces between command and its argument 1288 # Note that it is still expected that the arguments are blocks marked by parentheses rather than single characters, and that intervening comments will inhibit the association 1289 my $predefinedcmdoptseq01='\\\\(?:url|BibitemShut)\s*\s*(?:\{'. $pat_n . '\}\s*){1}'; # Commands with one non-optional argument 1290 my $predefinedcmdoptseq12='\\\\(?:href|bibfield|bibinfo)\s*(?:\['.$brat_n.'\])?\s*(?:\{'. $pat_n . '\}\s*){2}'; # Commands with one optional and two non-optional arguments 1291# my $predefinedcmdoptseq11='\\\\(?:bibitem)\s*(?:\['.$brat_n.'\])?\s*(?:\{'. $pat_n . '\}\s*){1}'; # Commands with one optional and one non-optional arguments 1292# \bibitem in revtex styles appears to be always followed by \BibItemOpen. We bind \BibItemOpen to the bibitem (if present) in order to prevent the comparison algorithm to interpret the \BibItemOpen as an identical part of the sequence; this interpretation can lead to added and removed entries to the reference list to become mixed. 1293 my $predefinedbibitem='\\\\(?:bibitem)\s*(?:\['.$brat_n.'\])?\s*(?:\{'. $pat_n . '\})(?:%?\s*\\\\BibitemOpen)?'; # Commands with one optional and one non-optional arguments 1294 1295 my $predefinedcmdoptseq='(?:'.$predefinedcmdoptseq12.'|'.$predefinedcmdoptseq01.'|'.$predefinedbibitem.')'; 1296 1297 my $cmdleftright='\\\\(?:left|right|[Bb]igg?[lrm]?|middle)\s*(?:[<>()\[\]|\.]|\\\\(?:[|{}]|\w+))'; 1298 my $cmdoptseq='\\\\[\w\d@\*]+'.$extraspace.'(?:(?:<'.$abrat0.'>|\['.$brat_n.'\]|\{'. $pat_n . '\}|\(' . $coords .'\))'.$extraspace.')*'; 1299 my $defseq='\\\\def\\\\[\w\d@\*]+(?:#\d+|\[#\d+\])+(?:\{'. $pat_n . '\})?'; 1300 my $backslashnl='\\\\\n'; 1301 my $oneletcmd='\\\\.\*?(?:\['.$brat_n.'\]|\{'. $pat_n . '\})*'; 1302 my $math='\$(?:[^$]|\\\$)*?\$|\\\\[(](?:.|\n)*?\\\\[)]'; 1303## the current maths command cannot cope with newline within the math expression 1304 my $comment='%[^\n]*\n'; 1305 my $pat=qr/(?:\A\s*)?(?:${and}|${quotemarks}|${number}|${word}|$quotedunderscore|${defseq}|$cmdleftright|${predefinedcmdoptseq}|${cmdoptseq}|${math}|${backslashnl}|${oneletcmd}|${comment}|${punct}|${mathpunct}|\{|\})\s*/ ; 1306 1307 1308 1309 1310# now we are done setting up and can start working 1311my ($oldfile, $newfile) = @ARGV; 1312# check for existence of input files 1313if ( ! -e $oldfile ) { 1314 die "Input file $oldfile does not exist"; 1315} 1316if ( ! -e $newfile ) { 1317 die "Input file $newfile does not exist"; 1318} 1319 1320 1321# set the labels to be included into the file 1322# first find out which file name is longer for correct alignment 1323my ($diff,$oldlabel_n_spaces,$newlabel_n_spaces); 1324$oldlabel_n_spaces = 0; 1325$newlabel_n_spaces = 0; 1326$diff = length($newfile) - length($oldfile); 1327if ($diff > 0) { 1328 $oldlabel_n_spaces = $diff; 1329} 1330if ($diff < 0) { 1331 $newlabel_n_spaces = abs($diff); 1332} 1333 1334my ($oldtime,$newtime,$oldlabel,$newlabel); 1335if (defined($labels[0])) { 1336 $oldlabel=$labels[0] ; 1337} else { 1338 $oldtime=localtime((stat($oldfile))[9]); 1339 $oldlabel="$oldfile " . " "x($oldlabel_n_spaces) . $oldtime; 1340} 1341if (defined($labels[1])) { 1342 $newlabel=$labels[1] ; 1343} else { 1344 $newtime=localtime((stat($newfile))[9]); 1345 $newlabel="$newfile " . " "x($newlabel_n_spaces) . $newtime; 1346} 1347 1348$encoding=guess_encoding($newfile) unless defined($encoding); 1349 1350$encoding = "utf8" if $encoding =~ m/^utf8/i ; 1351if (lc($encoding) eq "utf8" ) { 1352 binmode(STDOUT, ":utf8"); 1353 binmode(STDERR, ":utf8"); 1354} 1355 1356# filter($text) 1357# Runs $text through the script provided in $filterscript argument, if set 1358# If not set, just returns $text unchanged. 1359# If flatten was set, defer filtering to flatten. flatten will run the filter 1360# on all incoming text prior to its own processing. 1361# If flatten was not set, filter each of old and new once (see just below this def) 1362sub filter { 1363 my ($text)=@_; 1364 my ($textout,$pid); 1365 if ($filterscript ne "") { 1366 print STDERR "Passing " . length($text) . " chars to filter script " . $filterscript . "\n" if $verbose; 1367 1368 if ($ignorefilterstderr) { 1369 # If we need to capture and bury STDERR, use the Open3 version, and close CHLD_ERR below. 1370 use IPC::Open3; 1371 # We consume STDERR from the process, and hide it 1372 $pid = open3(\*CHLD_IN, \*CHLD_OUT, \*CHLD_ERR, $filterscript) or die "open3() failed $!"; 1373 } 1374 else { 1375 # Capture STDOUT and use as our new $text. Allow STDERR to go to console. 1376 use IPC::Open2; 1377 $pid = open2(\*CHLD_OUT, \*CHLD_IN, $filterscript) or die "open2() failed $!"; 1378 } 1379 # Send in $text 1380 print CHLD_IN $text."\n"; # Adding a newline just to make sure there is one. 1381 close CHLD_IN; 1382 # Wait for output and gather it up 1383 while (<CHLD_OUT>) { 1384 $textout = $textout.$_; 1385 } 1386 if ($ignorefilterstderr) { 1387 close CHLD_ERR; # Enable only if Open3 used above 1388 } 1389 # On the off chance a very long running and/or frequently called script is used. 1390 waitpid( $pid, 0 ); 1391 $text = $textout; 1392 print STDERR "Received " . length($text) . " chars after filtering\n" if $verbose; 1393 print STDERR $text if $verbose; 1394 } 1395 return $text; 1396} 1397 1398$old=read_file_with_encoding($oldfile,$encoding); 1399$new=read_file_with_encoding($newfile,$encoding); 1400 1401if (not defined($flatten)) { 1402 $old=filter($old); 1403 $new=filter($new); 1404} 1405 1406 1407 1408 1409# reset time 1410exetime(1); 1411($oldpreamble,$oldbody,$oldpost)=splitdoc($old,'\\\\begin\{document\}','\\\\end\{document\}'); 1412 1413($newpreamble,$newbody,$newpost)=splitdoc($new,'\\\\begin\{document\}','\\\\end\{document\}'); 1414 1415 1416if ($flatten) { 1417 $oldbody=flatten($oldbody,$oldpreamble,File::Spec->rel2abs($oldfile),$encoding); 1418 $newbody=flatten($newbody,$newpreamble,File::Spec->rel2abs($newfile),$encoding); 1419 # flatten preamble 1420 $oldpreamble=flatten($oldpreamble,$oldpreamble,File::Spec->rel2abs($oldfile),$encoding); 1421 $newpreamble=flatten($newpreamble,$newpreamble,File::Spec->rel2abs($newfile),$encoding); 1422 1423} 1424 1425 1426 1427 1428my @auxlines; 1429 1430# boolean variab 1431my ($ulem)=0; 1432 1433if ( length $oldpreamble && length $newpreamble ) { 1434 # pre-process preamble by looking for commands used in \maketitle (title, author, date etc commands) 1435 # and marking up content with latexdiff markup 1436 @auxlines=preprocess_preamble($oldpreamble,$newpreamble); 1437 1438 @oldpreamble = split /\n/, $oldpreamble; 1439 @newpreamble = split /\n/, $newpreamble; 1440 1441 # If a command is defined in the preamble of the new file, and only uses safe commands, then it can be considered to be safe) (contribution S. Gouezel) 1442 # Base this assessment on the new preamble 1443 add_safe_commands($newpreamble); 1444 1445 # get a list of packages from preamble if not predefined 1446 %packages=list_packages($newpreamble) unless %packages; 1447 if ( %packages && $debug ) { my $key ; foreach $key (keys %packages) { print STDERR "DEBUG \\usepackage[",$packages{$key},"]{",$key,"}\n" ;} } 1448} 1449 1450# have to return to all processing to properly add preamble additions based on packages found 1451if (defined($graphicsmarkup)) { 1452 $graphicsmarkup=~tr/a-z/A-Z/; 1453 if ( $graphicsmarkup eq 'OFF' or $graphicsmarkup eq 'NONE' ) { 1454 $graphicsmarkup=NONE; 1455 } elsif ( $graphicsmarkup eq 'NEWONLY' or $graphicsmarkup eq 'NEW-ONLY' ) { 1456 $graphicsmarkup=NEWONLY; 1457 } elsif ( $graphicsmarkup eq 'BOTH' ) { 1458 $graphicsmarkup=BOTH; 1459 } elsif ( $graphicsmarkup !~ m/^[012]$/ ) { 1460 die "latexdiff Illegal value: ($graphicsmarkup) for option --highlight-graphics. Possible values: OFF,WHOLE,COARSE,FINE,0-2\n"; 1461 } 1462 # else use numerical value 1463} else { 1464 # Default: no explicit setting in menu 1465 if ( defined $packages{"graphicx"} or defined $packages{"graphics"} ) { 1466 $graphicsmarkup=NEWONLY; 1467 } else { 1468 $graphicsmarkup=NONE; 1469 } 1470} 1471 1472if (defined $packages{"hyperref"} ) { 1473 # deleted lines should not generate or appear in link names: 1474 print STDERR "hyperref package detected.\n" if $verbose ; 1475 $latexdiffpreamble =~ s/\{\\DIFadd\}/{\\DIFaddtex}/g; 1476 $latexdiffpreamble =~ s/\{\\DIFdel\}/{\\DIFdeltex}/g; 1477 $latexdiffpreamble .= join "\n",(extrapream("HYPERREF"),""); 1478 if($nolinks){ 1479 $latexdiffpreamble .= "\n\\hypersetup{bookmarks=false}"; 1480 } 1481 ### $latexdiffpreamble .= '%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF FOR HYPERREF PACKAGE' . "\n"; 1482 ### $latexdiffpreamble .= '\providecommand{\DIFadd}[1]{\texorpdfstring{\DIFaddtex{#1}}{#1}}' . "\n"; 1483 ### $latexdiffpreamble .= '\providecommand{\DIFdel}[1]{\texorpdfstring{\DIFdeltex{#1}}{}}' . "\n"; 1484 ### $latexdiffpreamble .= '%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF FOR HYPERREF PACKAGE' . "\n"; 1485} 1486 1487# add commands for figure highlighting to preamble 1488if ($graphicsmarkup != NONE ) { 1489 my @matches; 1490 # Check if \DIFaddbeginFL definition calls \DIFaddbegin - if so we will issue an error message that graphics highlighting is 1491 # is not compatible with this. 1492 # (A more elegant solution would be to suppress the redefinitions of the \DIFaddbeginFL etc commands, but for this narrow use case 1493 # I currently don't see this as an efficient use of time) 1494 ### The foreach loop does not make sense here. I don't know why I put this in - (F Tilmann) 1495 ###foreach my $cmd ( "DIFaddbegin","DIFaddend","DIFdelbegin","DIFdelend" ) { 1496 @matches=( $latexdiffpreamble =~ m/command\{\\DIFaddbeginFL}\{($pat_n)}/sg ) ; 1497 # we look at the last one of the list to take into account possible redefinition but almost always matches should have exactly one element 1498 if ( $matches[$#matches] =~ m/\\DIFaddbegin/ ) { 1499 die "Cannot combine graphics markup with float styles defining \\DIFaddbeginFL in terms of \\DIFaddbegin. Use --graphics-markup=none option or choose a different float style."; 1500 exit 10; 1501 } 1502 ###} 1503 $latexdiffpreamble .= join "\n",("\\newcommand{\\DIFscaledelfig}{$SCALEDELGRAPHICS}",extrapream("HIGHLIGHTGRAPHICS"),""); 1504 1505 # only change required for highlighting both is to declare \includegraphics safe, as preamble already contains commands for deleted environment 1506 if ( $graphicsmarkup == BOTH ) { 1507 init_regex_arr_list(\@SAFECMDLIST,'includegraphics'); 1508 } 1509} 1510 1511$ulem = ($latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{ulem\}/ || defined $packages{"ulem"}); 1512 1513 1514# If listings is being used or can be found in the latexdiff search path, add to the preamble auxiliary code to enable line-by-line markup 1515if ( defined($packages{"listings"}) or `kpsewhich listings.sty` ne "" ) { 1516 my @listingpreamble=extrapream("LISTINGS"); 1517 my @listingDIFcode=(); 1518 my $replaced; 1519 # note that in case user supplies preamblefile the type might not reflect well the 1520 @listingDIFcode=extrapream("-nofail","DIFCODE_" . $type) unless defined($preamblefile); 1521 if (!(@listingDIFcode)) { 1522 # if listingDIFcode is empty try to guess a suitable one from the preamble 1523 if ($latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{color\}/ and $ulem ) { 1524 @listingDIFcode=extrapream("DIFCODE_UNDERLINE"); 1525 } elsif ( $latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{color\}/ ) { 1526 # only colour used 1527 @listingDIFcode=extrapream("DIFCODE_CFONT"); 1528 } else { 1529 # fall-back solution 1530 @listingDIFcode=extrapream("DIFCODE_BOLD"); 1531 } 1532 } 1533 # now splice it in 1534 $replaced=0; 1535 ###print STDERR "DEBUG: listingDIFcode: ",join("\n",@listingDIFcode),"|||\n" if $debug; 1536 1537 @listingpreamble=grep { 1538 # only replace if this has not been done already (use short-circuit property of and) 1539 if (!$replaced and $_ =~ s/^.*%DIFCODE TEMPLATE.*$/join("\n",@listingDIFcode)/e ) { 1540 ###print STDERR "DEBUG: Replaced text $_\n" if $debug; 1541 $replaced=1; 1542 1; 1543 } else { 1544 # return false for those lines matching %DIFCODE TEMPLATE (so that they are not included in output) 1545 not m/%DIFCODE TEMPLATE/; 1546 } 1547 } @listingpreamble; 1548 ### print STDERR "DEBUG: listingpreamble @listingpreamble\n"; 1549 $latexdiffpreamble .= join "\n",(@listingpreamble,""); 1550} else { 1551 print STDERR "WARNING: listings package not detected. Disabling mark-up in verbatim environments \n" ; 1552 # if listings does not exist disable line-by-line markup and treat all verbatim environments as opaque 1553 $VERBATIMENV = liststringtoregex($CONFIG{VERBATIMENV}.";".$CONFIG{VERBATIMLINEENV}); 1554 $VERBATIMLINEENV = ""; 1555} 1556 1557# adding begin and end marker lines to preamble 1558$latexdiffpreamble = "%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF\n" . $ latexdiffpreamble . "%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF\n"; 1559 1560# and return to preamble specific processing 1561if ( length $oldpreamble && length $newpreamble ) { 1562 print STDERR "Differencing preamble.\n" if $verbose; 1563 1564 # insert dummy first line such that line count begins with line 1 (rather than perl's line 0) - just so that line numbers inserted by linediff are correct 1565 unshift @newpreamble,''; 1566 unshift @oldpreamble,''; 1567 @diffpreamble = linediff(\@oldpreamble, \@newpreamble); 1568 # remove dummy line again 1569 shift @diffpreamble; 1570 # add filenames, modification time and latexdiff mark 1571 defined($nolabel) or splice @diffpreamble,1,0, 1572 "%DIF LATEXDIFF DIFFERENCE FILE", 1573 ,"%DIF DEL $oldlabel", 1574 "%DIF ADD $newlabel"; 1575 if ( @auxlines ) { 1576 push @diffpreamble,"%DIF DELETED TITLE COMMANDS FOR MARKUP"; 1577 push @diffpreamble,join("\n",@auxlines); 1578 } 1579 if ( $earlylatexdiffpreamble) { 1580 # insert latexdiff command directly after documentclass at beginning of preamble 1581 # note that grep is only run for its side effect 1582 ( grep { s/^([^%]*\\documentclass.*)$/$1$latexdiffpreamble/ } @diffpreamble )==1 or die "Could not find documentclass statement in preamble"; 1583 } else { 1584 # insert latexdiff commands at the end of preamble (default behaviour) 1585 push @diffpreamble,$latexdiffpreamble; 1586 } 1587 push @diffpreamble,'\begin{document}'; 1588 if (defined $packages{"hyperref"} && $nolinks) { 1589 push @diffpreamble, '\begin{NoHyper}'; 1590 } 1591} 1592elsif ( !length $oldpreamble && !length $newpreamble ) { 1593 @diffpreamble=(); 1594} else { 1595 print STDERR "Either both texts must have preamble or neither text must have the preamble.\n"; 1596 exit(2); 1597} 1598 1599# Special: treat all cite commands as safe except in UNDERLINE and FONTSTRIKE mode 1600# (there is a conflict between citation and ulem package, see 1601# package documentation) 1602# Use post-processing 1603# and $packages{"apacite"}!~/natbibpapa/ 1604 1605 1606if (defined $packages{"units"} && $ulem ) { 1607 # protect inlined maths environments by surrounding with an \mbox 1608 # this is done to get around an incompatibility between the ulem and units package 1609 # where spaces in the argument to underlined or crossed-out \unit commands cause an error message 1610 print STDERR "units package detected at the same time as style using ulem.\n" if $verbose ; 1611 $MBOXINLINEMATH=1; 1612} 1613 1614if (defined $packages{"siunitx"} ) { 1615 # protect SI command by surrounding them with an \mbox 1616 # this is done to get around an incompatibility between the ulem and siunitx package 1617 print STDERR "siunitx package detected.\n" if $verbose ; 1618 my $mboxcmds='SI,ang,numlist,numrange,SIlist,SIrange'; 1619 init_regex_arr_list(\@SAFECMDLIST,'num,si'); 1620 if ( $enablecitmark || ( $ulem && ! $disablecitmark )) { 1621 init_regex_arr_list(\@MBOXCMDLIST,$mboxcmds); 1622 } else { 1623 init_regex_arr_list(\@SAFECMDLIST,$mboxcmds); 1624 } 1625} 1626 1627if (defined $packages{"cleveref"} ) { 1628 # protect selected command by surrounding them with an \mbox 1629 # this is done to get around an incompatibility between ulem and cleveref package 1630 print STDERR "cleveref package detected.\n" if $verbose ; 1631 my $mboxcmds='[Cc]ref(?:range)?\*?,labelcref,(?:lc)?name[cC]refs?' ; 1632 if ( $enablecitmark || ( $ulem && ! $disablecitmark )) { 1633 init_regex_arr_list(\@MBOXCMDLIST,$mboxcmds); 1634 } else { 1635 init_regex_arr_list(\@SAFECMDLIST,$mboxcmds); 1636 } 1637} 1638 1639if (defined $packages{"glossaries"} ) { 1640 # protect selected command by surrounding them with an \mbox 1641 # this is done to get around an incompatibility between ulem and glossaries package 1642 print STDERR "glossaries package detected.\n" if $verbose ; 1643 my $mboxcmds='[gG][lL][sS](?:|pl|disp|link|first|firstplural|desc|user[iv][iv]?[iv]?),[aA][cC][rR](?:long|longpl|full|fullpl),[aA][cC][lfp]?[lfp]?'; 1644 init_regex_arr_list(\@SAFECMDLIST,'[gG][lL][sS](?:(?:entry)?(?:text|plural|name|symbol)|displaynumberlist|entryfirst|entryfirstplural|entrydesc|entrydescplural|entrysymbolplural|entryuser[iv][iv]?[iv]?|entrynumberlist|entrydisplaynumberlist|entrylong|entrylongpl|entryshort|entryshortpl|entryfull|entryfullpl),[gG]lossentry(?:name|desc|symbol),[aA][cC][rR](?:short|shortpl),[aA]csp?'); 1645 if ( $enablecitmark || ( $ulem && ! $disablecitmark )) { 1646 init_regex_arr_list(\@MBOXCMDLIST,$mboxcmds); 1647 } else { 1648 init_regex_arr_list(\@SAFECMDLIST,$mboxcmds); 1649 } 1650} 1651 1652if (defined $packages{"chemformula"} or defined $packages{"chemmacros"} ) { 1653 print STDERR "chemformula package detected.\n" if $verbose ; 1654 init_regex_arr_list(\@SAFECMDLIST,'ch'); 1655 push(@UNSAFEMATHCMD,'ch'); 1656 # The next command would be needed to allow highlighting the interior of \ch commands in math environments 1657 # but the redefinitions in chemformula are too deep to make this viable 1658 # push(@MATHTEXTCMDLIST,'ch'); 1659} 1660 1661if (defined $packages{"mhchem"} ) { 1662 print STDERR "mhchem package detected.\n" if $verbose ; 1663 init_regex_arr_list(\@SAFECMDLIST,'ce'); 1664 push(@UNSAFEMATHCMD,'ce','cee'); 1665 # The next command would be needed to allow highlighting the interior of \cee commands in math environments 1666 # but the redefinitions in chemformula are too deep to make this viable 1667 # push(@MATHTEXTCMDLIST,'cee'); 1668} 1669 1670 1671my ( $citpat); 1672 1673if ( defined $packages{"apacite"} ) { 1674 print STDERR "apacite package detected.\n" if $verbose ; 1675 $citpat='(?:mask)?(?:full|short|no)?cite(?:A|author|year|meta)?(?:NP)?'; 1676} elsif (defined $packages{"biblatex"}) { 1677 print STDERR "biblatex package detected.\n" if $verbose ; 1678 $citpat='(?:[cC]ites?|(?:[pP]aren|foot|[Tt]ext|[sS]mart|super)cites?\*?|footnotecitetex)'; 1679} else { 1680 # citation command pattern for all other citation schemes 1681 $citpat='(?:cite\w*|nocite)'; 1682}; 1683 1684if ( ! $ulem ) { 1685 # modes not using ulem: citation is safe 1686 push (@SAFECMDLIST, $citpat); 1687} else { 1688 ### Experimental: disable text and emph commands 1689 push(@SAFECMDEXCL, qr/^emph$/, qr/^text..$/); 1690 # replace \cite{..} by \mbox{\cite{..}} in added or deleted blocks in post-processing 1691 push(@MBOXCMDLIST,$citpat) unless $disablecitmark; 1692 if ( uc($subtype) eq "COLOR" or uc($subtype) eq "DVIPSCOL" ) { 1693 # remove \cite command again from list of safe commands 1694 pop @MBOXCMDLIST; 1695 # deleted cite commands 1696 } 1697} 1698push(@MBOXCMDLIST,$citpat) if $enablecitmark ; 1699 1700 1701if (defined $packages{"amsmath"} or defined $packages{"amsart"} or defined $packages{"amsbook"} ) { 1702 print STDERR "amsmath package detected.\n" if $verbose ; 1703 $MATHARRREPL='align*'; 1704} 1705 1706# add commands in MBOXCMDLIST to SAFECMDLIST 1707foreach $mboxcmd ( @MBOXCMDLIST ) { 1708 init_regex_arr_list(\@SAFECMDLIST, $mboxcmd); 1709} 1710 1711# check if \label is in SAFECMDLIST, and if yes replace "label" in $LABELCMD by something that never matches (we hope!) 1712if ( iscmd("label",\@SAFECMDLIST,\@SAFECMDEXCL) ) { 1713 $LABELCMD=~ s/label/NEVERMATCHLABEL/; 1714} 1715 1716 1717 1718print STDERR "Preprocessing body. " if $verbose; 1719preprocess($oldbody,$newbody); 1720 1721 1722# run difference algorithm 1723@diffbody=bodydiff($oldbody, $newbody); 1724$diffbo=join("",@diffbody); 1725writedebugfile($diffbo,"bodydiff"); 1726 1727print STDERR "(",exetime()," s)\n","Postprocessing body. \n" if $verbose; 1728postprocess($diffbo); 1729$diffall =join("\n",@diffpreamble) ; 1730# add visible labels 1731if (defined($visiblelabel)) { 1732 # Give information right after \begin{document} (or at the beginning of the text for files without preamble 1733 ### if \date command is used, add information to \date argument, otherwise give right after \begin{document} 1734 ### $diffall=~s/(\\date$extraspace(?:\[$brat0\])?$extraspace)\{($pat_n)\}/$1\{$2 \\ LATEXDIFF comparison \\ Old: $oldlabel \\ New: $newlabel \}/ or 1735 $diffbo = "\\begin{verbatim}LATEXDIFF comparison\nOld: $oldlabel\nNew: $newlabel\\end{verbatim}\n$diffbo" ; 1736} 1737 1738$diffall .= "$diffbo" ; 1739if (defined $packages{"hyperref"} && $nolinks) { 1740 $diffall .= "\\end{NoHyper}\n"; 1741} 1742$diffall .= "\\end{document}$newpost" if length $newpreamble ; 1743if ( lc($encoding) ne "utf8" && lc($encoding) ne "ascii" ) { 1744 print STDERR "Encoding output file to $encoding\n" if $verbose; 1745 $diffall=Encode::encode($encoding,$diffall); 1746 binmode STDOUT; 1747} 1748print $diffall; 1749 1750 1751print STDERR "(",exetime()," s)\n","Done.\n" if $verbose; 1752 1753 1754# liststringtoregex(liststring) 1755# expands string with semi-colon separated list into a regular expression corresponding 1756# matching any of the elements 1757sub liststringtoregex { 1758 my ($liststring)=@_; 1759 my @elements=grep /\S/,split(";",$liststring); 1760 if ( @elements) { 1761 return('(?:(?:' . join(')|(?:',@elements) .'))'); 1762 } else { 1763 return ""; 1764 } 1765} 1766 1767# show_configuration 1768# note that this is not encapsulated but uses variables from the main program 1769# It is provided for convenience because in the future it is planned to allow output 1770# to be modified based on what packages are read etc - this works only if the input files are actually read 1771# whether or not additional files are provided 1772sub show_configuration { 1773 if ($showpreamble) { 1774 print "\nPreamble commands:\n"; 1775 print $latexdiffpreamble ; 1776 } 1777 1778 if ($showsafe) { 1779 print "\nsafecmd: Commands safe within scope of $ADDOPEN $ADDCLOSE and $DELOPEN $DELCLOSE (unless excluded):\n"; 1780 print_regex_arr(@SAFECMDLIST); 1781 print "\nsafecmd-exlude: Commands not safe within scope of $ADDOPEN $ADDCLOSE and $DELOPEN $DELCLOSE :\n"; 1782 print_regex_arr(@SAFECMDEXCL); 1783 print "\nmboxsafecmd: Commands safe only if they are surrounded by \\mbox command:\n"; 1784 print_regex_arr(@MBOXCMDLIST); 1785 print "\nnmboxsafecmd: Commands not safe:\n"; 1786 print_regex_arr(@MBOXCMDEXCL); 1787 } 1788 1789 if ($showtext) { 1790 print "\nCommands with last argument textual (unless excluded) and safe in every context:\n"; 1791 print_regex_arr(@TEXTCMDLIST); 1792 print "\nContext1 commands (last argument textual, command will be disabled in deleted passages, last argument will be shown as plain text):\n"; 1793 print_regex_arr(@CONTEXT1CMDLIST); 1794 print "\nContext2 commands (last argument textual, command and its argument will be disabled in deleted passages):\n"; 1795 print_regex_arr(@CONTEXT2CMDLIST); 1796 print "\nExclude list of Commands with last argument not textual (overrides patterns above):\n"; 1797 print_regex_arr(@TEXTCMDEXCL); 1798 } 1799 1800 1801 if ($showconfig) { 1802 print "Configuration variables:\n"; 1803 print "ARRENV=$ARRENV\n"; 1804 print "COUNTERCMD=$COUNTERCMD\n"; 1805 print "FLOATENV=$FLOATENV\n"; 1806 print "ITEMCMD=$ITEMCMD\n"; 1807 print "LISTENV=$LISTENV\n"; 1808 print "MATHARRENV=$MATHARRENV\n"; 1809 print "MATHARRREPL=$MATHARRREPL\n"; 1810 print "MATHENV=$MATHENV\n"; 1811 print "MATHREPL=$MATHREPL\n"; 1812 print "MINWORDSBLOCK=$MINWORDSBLOCK\n"; 1813 print "PICTUREENV=$PICTUREENV\n"; 1814 print "SCALEDELGRAPHICS=$SCALEDELGRAPHICS\n"; 1815 print "VERBATIMENV=$VERBATIMENV\n"; 1816 print "VERBATIMLINEENV=$VERBATIMLINEENV\n"; 1817 print "CUSTOMDIFCMD=$CUSTOMDIFCMD\n"; 1818 } 1819} 1820 1821 1822 1823## guess_encoding(filename) 1824## reads the first 20 lines of filename and looks for call of inputenc package 1825## if found, return the option of this package (encoding), otherwise return utf8 1826sub guess_encoding { 1827 my ($filename)=@_; 1828 my ($i,$enc); 1829 open (FH, $filename) or die("Couldn't open $filename: $!"); 1830 $i=0; 1831 while (<FH>) { 1832 next if /^\s*%/; # skip comment lines 1833 if (m/\\usepackage\[(\w*?)\]\{inputenc\}/) { 1834 close(FH); 1835 return($1); 1836 } 1837 last if (++$i > 20 ); # scan at most 20 non-comment lines 1838 } 1839 close(FH); 1840 ### return("ascii"); 1841 return("utf8"); 1842} 1843 1844 1845sub read_file_with_encoding { 1846 my ($output); 1847 my ($filename, $encoding) = @_; 1848 1849 if (lc($encoding) eq "utf8" ) { 1850 open (FILE, "<:utf8",$filename) or die("Couldn't open $filename: $!"); 1851 local $/ ; # locally set record operator to undefined, ie. enable whole-file mode 1852 $output=<FILE>; 1853 } elsif ( lc($encoding) eq "ascii") { 1854 open (FILE, $filename) or die("Couldn't open $filename: $!"); 1855 local $/ ; # locally set record operator to undefined, ie. enable whole-file mode 1856 $output=<FILE>; 1857 } else { 1858 require Encode; 1859 open (FILE, "<",$filename) or die("Couldn't open $filename: $!"); 1860 local $/ ; # locally set record operator to undefined, ie. enable whole-file mode 1861 $output=<FILE>; 1862 print STDERR "Converting $filename from $encoding to utf8\n" if $verbose; 1863 $output=Encode::decode($encoding,$output); 1864 } 1865 close FILE; 1866 if ($^O eq "linux" ) { 1867 $output =~ s/\r\n/\n/g ; 1868 } 1869 return $output; 1870} 1871 1872## %packages=list_packages(@preamble) 1873## scans the arguments for \documentclass,\RequirePackage and \usepackage statements and constructs a hash 1874## whose keys are the included packages, and whose values are the associated optional arguments 1875#sub list_packages { 1876# my (@preamble)=@_; 1877# my %packages=(); 1878# foreach $line ( @preamble ) { 1879# # get rid of comments 1880# $line=~s/(?<!\\)%.*$// ; 1881# if ( $line =~ m/\\(?:documentclass|usepackage|RequirePackage)(?:\[(.+?)\])?\{(.*?)\}/ ) { 1882## print STDERR "Found something: |$line|\n" if $debug; 1883# if (defined($1)) { 1884# $packages{$2}=$1; 1885# } else { 1886# $packages{$2}=""; 1887# } 1888# } 1889# } 1890# return (%packages); 1891#} 1892 1893 1894# %packages=list_packages($preamble) 1895# scans the arguments for \documentclass,\RequirePackage and \usepackage statements and constructs a hash 1896# whose keys are the included packages, and whose values are the associated optional arguments 1897# if argument of \usepackage or \RequirePackage is comma separated list, treat as different packages 1898sub list_packages { 1899 my ($preamble)=@_; 1900 my %packages=(); 1901 my $pkg; 1902 1903 # remove comments 1904 $preamble=~s/(?<!\\)%.*$//mg ; 1905 1906 while ( $preamble =~ m/\\(?:documentclass|usepackage|RequirePackage)(?:\[($brat_n)\])?\{(.*?)\}/gs ) { 1907 if (defined($1)) { 1908 foreach $pkg ( split /,/,$2 ) { 1909 $packages{$pkg}=$1; 1910 } 1911 } else { 1912 foreach $pkg ( split /,/,$2 ) { 1913 $packages{$pkg}=""; 1914 } 1915 } 1916 } 1917 1918 # sometimes, class options are defined in such a way that they imply the loading and/or presence of a package 1919 # so we also treat all class options as 'packages. 1920 if ( $preamble =~ m/\\documentclass\s*\[($brat_n)\]\s*\{.*?\}/s ) { 1921 foreach $pkg ( split /,/,$1 ) { 1922 $pkg =~ s/\s//g ; # remove space and newline characters 1923 $packages{$pkg}="" unless exists($packages{$pkg}); 1924 } 1925 } 1926 return (%packages); 1927} 1928 1929# Subroutine add_safe_commands modified from version provided by S. Gouezel 1930# add_safe_commands($preamble) 1931# scans the argument for \newcommand and \DeclareMathOperator, 1932# and adds the created commands which are clearly safe to @SAFECMDLIST 1933sub add_safe_commands { 1934 my ($preamble)=@_; 1935 1936 # get rid of comments 1937 $preamble=~s/(?<!\\)%.*$//mg ; 1938 1939 my $to_test = ""; 1940 # test for \DeclareMathOperator{\foo}{myoperator} 1941 while ( $preamble =~ m/\DeclareMathOperator\s*\*?\{\\(\w*?)\}/osg) { 1942 $to_test=$1; 1943 if ($to_test ne "" and not iscmd($to_test,\@SAFECMDLIST,\@SAFECMDEXCL) and not iscmd($to_test, \@SAFECMDEXCL, [])) { 1944 # one should add $to_test to the list of safe commands. 1945 init_regex_arr_list(\@SAFECMDLIST, $to_test); 1946 print STDERR "Adding $to_test to the list of safe commands\n" if $verbose; 1947 } 1948 } 1949 1950 while ( $preamble =~ m/\\(?:new|renew|provide)command\s*{\\(\w*)\}(?:|\[\d*\])\s*\{(${pat_n})\}/osg ) { 1951 my $maybe_to_test = $1; 1952 my $should_be_safe = $2; 1953 print STDERR "DEBUG Checking new command: maybe_to_test, should_be_safe: $1 $2\n" if $debug; 1954 my $success = 0; 1955 # test if all latex commands inside it are safe 1956 $success = 1; 1957 if ($should_be_safe =~ m/\\\\/) { 1958 $success = 0; 1959 } else { 1960 while ($should_be_safe =~ m/\\(\w+)/g) { 1961 ### print STDERR "DEBUG: Testing command $1 " if $debug; 1962 $success = 0 unless iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL); ### or $1 eq ""; 1963 ### print STDERR " success=$success\n" if $debug; 1964 } 1965 } 1966 ### } 1967 if ($success) { 1968 $to_test = $maybe_to_test; 1969 if ( not iscmd($to_test,\@SAFECMDLIST,\@SAFECMDEXCL) and not iscmd($to_test, \@SAFECMDEXCL, [])) { 1970 # # one should add $to_test to the list of safe commands. 1971 init_regex_arr_list(\@SAFECMDLIST, $to_test); 1972 print STDERR "Adding $to_test to the list of safe commands\n" if $verbose; 1973 } 1974 } 1975 } 1976} 1977 1978 1979# helper function for flatten 1980# remove \endinput at beginning of line and everything 1981# following it, # if \endinput is not at the beginning of 1982# the line, nothing will be removed. It is assumed that 1983# this case is most common when \endinput is part of a 1984# conditional clause. The file will only be processed 1985# correctly if the conditional is always false, 1986# i.e. \endinput # not actually reached 1987sub remove_endinput { 1988 # s/// operates on default input 1989 $_[0] =~ s/^\\endinput.*\Z//ms ; 1990 return($_[0]); 1991} 1992 1993# flatten($text,$preamble,$filename,$encoding) 1994# expands \input and \include commands within text 1995# expands \bibliography command with corresponding bbl file if available 1996# expands \subfile command (from subfiles package - not part of standard text distribution) 1997# preamble is scanned for includeonly commands 1998# encoding is the encoding 1999sub flatten { 2000 my ($text,$preamble,$filename,$encoding)=@_; 2001 my ($includeonly,$dirname,$fname,$newpage,$fullfile,$filecontent,$replacement,$begline,$inputcmd,$bblfile,$subfile,$command,$verbenv,$verboptions,$ignore,$fileonly); 2002 my ($subpreamble,$subbody,$subpost); 2003 my ($subdir,$subdirfull,$importfilepath); 2004 require File::Basename ; 2005 ### require File::Spec ; # now this is needed even if flatten option not given 2006 $filename = File::Spec->rel2abs( $filename ) ; 2007 ($ignore, $dirname, $fileonly) = File::Spec->splitpath($filename) ; 2008 $bblfile = $filename; 2009 $bblfile=~s/\.tex$//; 2010 $bblfile.=".bbl"; 2011 2012 if ( ($includeonly) = ($preamble =~ m/\\includeonly\{(.*?)\}/ ) ) { 2013 $includeonly =~ s/,/|/g; 2014 } else { 2015 $includeonly = '.*?'; 2016 } 2017 2018 print STDERR "DEBUG: includeonly $includeonly\n" if $debug; 2019 2020 2021 # Run through filter, to let filterscript have a pass if it was set 2022 $text = filter($text); 2023 2024 # Recursively replace \\import and \\subimport files 2025 $text =~ s/(^(?:[^%\n]|\\%)*)(\\subimport\{(.*?)\}|\\import\{(.*?)\})(?:[\s]*)\{(.*?)\}/{ 2026 # $1 is begline 2027 # $3 is directory if subimport 2028 # $4 is directory if import 2029 # $5 is filename 2030 $begline = (defined($1)? $1 : ""); 2031 $subdir = $3 if defined($3); 2032 $subdir = $4 if defined($4); 2033 $fname = $5; 2034 $fname .= ".tex" unless $fname =~ m|\.\w{3,4}$|; 2035 print STDERR "DEBUG begline:", $begline, "\n" if $debug; 2036 print STDERR "DEBUG", (defined($3)? "subimport_file:" : "import_file:"), $subdir, "\n" if $debug; 2037 print STDERR "DEBUG file:", $fname, "\n" if $debug; 2038 2039 # subimport appends $subdir to the current $dirname. import replaces it with an absolute path. 2040 $subdirfull = (defined($3) ? File::Spec->catdir($dirname,$subdir) : $subdir); 2041 2042 $importfilepath = File::Spec->catfile($subdirfull, $fname); 2043 2044 print STDERR "importing importfilepath:", $importfilepath,"\n" if $verbose; 2045 if ( -f $importfilepath ) { 2046 # If file exists, replace input or include command with expanded input 2047 $replacement=flatten(read_file_with_encoding($importfilepath, $encoding), $preamble,$importfilepath,$encoding) or die "Could not open file ",$fullfile,": $!"; 2048 } else { 2049 # if file does not exist, do not expand include or input command (do not warn if fname contains #[0-9] as it is then likely part of a command definition 2050 # and is not meant to be expanded directly 2051 print STDERR "WARNING: Could not find included file ",$importfilepath,". I will continue but not expand |$2|\n"; 2052 $replacement=(defined($3)? "\\subimport" : "\\import"); 2053 $replacement .= "{$subdir}{$fname} % Processed"; 2054 } 2055 "$begline$replacement"; 2056 }/exgm; 2057 2058 # recursively replace \\input and \\include files 2059 $text =~ s/(^(?:[^%\n]|\\%)*)(\\input\{(.*?)\}|\\include\{(${includeonly}(?:\.tex)?)\})/{ 2060 $begline=(defined($1)? $1 : "") ; 2061 $inputcmd=$2; 2062 $fname = $3 if defined($3) ; 2063 $fname = $4 if defined($4) ; 2064 $newpage=(defined($4)? " \\newpage " : "") ; 2065 # # add tex extension unless there is a three or four letter extension already 2066 $fname .= ".tex" unless $fname =~ m|\.\w{3,4}$|; 2067 $fullfile = File::Spec->catfile($dirname,$fname); 2068 print STDERR "DEBUG Beg of line match |$1|\n" if defined($1) && $debug ; 2069 print STDERR "Include file $fname\n" if $verbose; 2070 print STDERR "DEBUG looking for file ",$fullfile, "\n" if $debug; 2071 # content of file becomes replacement value (use recursion), add \newpage if the command was include 2072 if ( -f $fullfile ) { 2073 # If file exists, replace input or include command with expanded input 2074 $replacement=flatten(read_file_with_encoding($fullfile, $encoding), $preamble,$filename,$encoding) or die "Could not open file ",$fullfile,": $!"; 2075 $replacement = remove_endinput($replacement); 2076 # \include always starts a new page; use explicit \newpage command to simulate this 2077 } else { 2078 # if file does not exist, do not expand include or input command (do not warn if fname contains #[0-9] as it is then likely part of a command definition 2079 # and is not meant to be expanded directly 2080 print STDERR "WARNING: Could not find included file ",$fullfile,". I will continue but not expand |$inputcmd|\n" unless $fname =~ m(#[0-9]) ; 2081 $replacement = $inputcmd ; # i.e. just the original command again -> make no change file does not exist 2082 $newpage=""; 2083 } 2084 "$begline$newpage$replacement$newpage"; 2085 }/exgm; 2086 2087 # replace bibliography with bbl file if it exists 2088 $text=~s/(^(?:[^%\n]|\\%)*)\\bibliography\{(.*?)\}/{ 2089 if ( -f $bblfile ){ 2090 $replacement=read_file_with_encoding(File::Spec->catfile($bblfile), $encoding); 2091 } else { 2092 warn "Bibliography file $bblfile cannot be found. No flattening of \\bibliography done. Run bibtex on old and new files first"; 2093 $replacement="\\bibliography{$2}"; 2094 } 2095 $begline=(defined($1)? $1 : "") ; 2096 "$begline$replacement"; 2097 }/exgm; 2098 2099 # replace subfile with contents (subfile package) 2100 $text=~s/(^(?:[^%\n]|\\%)*)\\subfile\{(.*?)\}/{ 2101 $begline=(defined($1)? $1 : "") ; 2102 $fname = $2; 2103 # # add tex extension unless there is a three or four letter extension already 2104 $fname .= ".tex" unless $fname =~ m|\.\w{3,4}|; 2105 print STDERR "Include file as subfile $fname\n" if $verbose; 2106 # content of file becomes replacement value (use recursion) 2107 # now strip away everything outside and including \begin{document} and \end{document} pair# 2108 # # note: no checking for comments is made 2109 $fullfile=File::Spec->catfile($dirname,$fname); 2110 if ( -f $fullfile) { 2111 # if file exists, expand \subfile command by contents of file 2112 $subfile=read_file_with_encoding($fullfile,$encoding) or die "Could not open included subfile ",$fullfile,": $!"; 2113 ($subpreamble,$subbody,$subpost)=splitdoc($subfile,'\\\\begin\{document\}','\\\\end\{document\}'); 2114 ### $subfile=~s|^.*\\begin{document}||s; 2115 ### $subfile=~s|\\end{document}.*$||s; 2116 $replacement=flatten($subbody, $preamble,$filename,$encoding); 2117 ### $replacement = remove_endinput($replacement); 2118 } else { 2119 # if file does not exist, do not expand subfile 2120 print STDERR "WARNING: Could not find subfile ",$fullfile,". I will continue but not expand |$2|\n" unless $fname =~ m(#[0-9]) ; 2121 $replacement = "\\subfile\{$2\}" ; # i.e. just the original command again -> make no change file does not exist 2122 } 2123 2124 "$begline$replacement"; 2125 }/exgm; 2126 2127 # replace \verbatiminput and \lstlistinginput 2128 $text=~s/(^(?:[^%\n]|\\%)*)\\(verbatiminput\*?|lstinputlisting)$extraspace(\[$brat_n\])?$extraspace\{(.*?)\}/{ 2129 $begline=(defined($1)? $1 : "") ; 2130 $command = $2 ; 2131 $fname = $4 ; 2132 $verboptions = defined($3)? $3 : "" ; 2133 if ($command eq 'verbatiminput' ) { 2134 $verbenv = "verbatim" ; 2135 } elsif ($command eq 'verbatiminput*' ) { 2136 $verbenv = "verbatim*" ; 2137 } elsif ($command eq 'lstinputlisting' ) { 2138 $verbenv = "lstlisting" ; 2139 } else { 2140 die "Internal errorL Unexpected verbatim input type $command.\n"; 2141 } 2142 print STDERR "DEBUG Beg of line match |$begline|\n" if $debug ; 2143 print STDERR "Include file $fname verbatim\n" if $verbose; 2144 print STDERR "DEBUG looking for file ",File::Spec->catfile($dirname,$fname), "\n" if $debug; 2145 # content of file becomes replacement value (do not use recursion), add \newpage if the command was include 2146 ###$replacement=read_file_with_encoding(File::Spec->catfile($dirname,$fname), $encoding) or die "Couldn't find file ",File::Spec->catfile($dirname,$fname),": $!"; 2147 $replacement=read_file_with_encoding(File::Spec->catfile($dirname,$fname), $encoding) or die "Couldn't find file ",File::Spec->catfile($dirname,$fname),": $!"; 2148 # Add a new line if it not already there (note that the matching operator needs to use different delimiters, as we are still inside an outer scope that takes precedence 2149 $replacement .= "\n" unless $replacement =~ m(\n$) ; 2150 "$begline\\begin{$verbenv}$verboptions\n$replacement\\end{$verbenv}\n"; 2151 }/exgm; 2152 2153 return($text); 2154} 2155 2156 2157# print_regex_arr(@arr) 2158# prints regex array without x-ism expansion put in by pearl to stdout 2159sub print_regex_arr { 2160 my $dumstring; 2161 $dumstring = join(" ",@_); # PERL generates string (?-xism:^ref$) for quoted refex ^ref$ 2162 $dumstring =~ s/\(\?-xism:\^(.*?)\$\)/$1/g; # remove string and ^,$ marks before output 2163 print $dumstring,"\n"; 2164} 2165 2166 2167# @lines=extrapream($type,...) 2168# reads line from appendix or external file 2169# (end of file after __END__ token) 2170# if $type is a filename, it will read the file instead of reading from the appendix 2171# otherwise it will screen appendix for line "%DIF $TYPE" and copy everything up to line 2172# '%DIF END $TYPE' (where $TYPE is upcased version of $type) 2173# extrapream('-nofail',$type) will---instead of failing---simply return nothing if 2174# it does not find the matching line in a appendix (do not use -nofail option with multiple types!) 2175sub extrapream { 2176 my @types=@_; 2177 my ($type,$arg); 2178 my $nofail=0; 2179 ###my @retval=("%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF") ; 2180 my @retval=(); 2181 my ($copy); 2182 2183 foreach $arg ( @types ) { 2184 if ( $arg eq '-nofail' ) { 2185 $nofail=1; 2186 next; 2187 } 2188 $type=$arg; 2189 $copy=0; 2190 if ( -f $type || lc $type eq '/dev/null' ) { 2191 open (FILE,$type) or die "Cannot open preamble file $type: $!"; 2192 print STDERR "Reading preamble file $type\n" if $verbose ; 2193 while (<FILE>) { 2194 chomp ; 2195 if ( $_ =~ m/%DIF PREAMBLE/ ) { 2196 push (@retval,"$_"); 2197 } else { 2198 push (@retval,"$_ %DIF PREAMBLE"); 2199 } 2200 } 2201 } else { # not (-f $type) 2202 $type=uc($type); # upcase argument 2203 print STDERR "Preamble Internal Type $type\n" if $verbose; 2204 # save filehandle position (before first read this points to line after __END__) 2205 # but seek DATA,0,0 resets it to the beginning of the file 2206 # see https://stackoverflow.com/questions/4459601/how-can-i-use-data-twice 2207 my $data_start = tell DATA; 2208 while (<DATA>) { 2209 if ( m/^%DIF $type/ ) { 2210 $copy=1; 2211 } elsif ( m/^%DIF END $type/ ) { 2212 last; 2213 } 2214 chomp; 2215 push (@retval,"$_ %DIF PREAMBLE") if $copy; 2216 } 2217 if ( $copy == 0 ) { 2218 unless ($nofail) { 2219 print STDERR "\nPreamble style $type not implemented.\n"; 2220 print STDERR "Write latexdiff -h to get help with available styles\n"; 2221 exit(2); 2222 } 2223 } 2224 seek DATA,$data_start,0; # rewind DATA handle to beginning of data record 2225 } 2226 } 2227 ###push (@retval,"%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF") ; 2228 return @retval; 2229} 2230 2231 2232# ($part1,$part2,$part3)=splitdoc($text,$word1,$word2) 2233# splits $text into 3 parts at $word1 and $word2. 2234# if neither $word1 nor $word2 exist, $part1 and $part3 are empty, $part2 is $text 2235# If only $word1 or $word2 exist but not the other, output an error message. 2236 2237# NB this version avoids $` and $' for performance reason although it only makes a tiny difference 2238# (in one test gain a tenth of a second for a 30s run) 2239sub splitdoc { 2240 my ($text,$word1,$word2)=@_; 2241 my ($part1,$part2,$part3)=("","",""); 2242 my ($rest,$pos); 2243 2244 if ( $text =~ m/(^[^%]*)($word1)/mg ) { 2245 $pos=pos $text; 2246 $part1=substr($text,0,$pos-length($2)); 2247 $rest=substr($text,$pos); 2248 if ( $rest =~ m/(^[^%]*)($word2)/mg ) { 2249 $pos=pos $rest; 2250 $part2=substr($rest,0,$pos-length($2)); 2251 $part3=substr($rest,$pos); 2252 } 2253 else { 2254 die "$word1 and $word2 not in the correct order or not present as a pair." ; 2255 } 2256 } else { 2257 $part2=$text; 2258 die "$word2 present but not $word1." if ( $text =~ m/(^[^%]*)$word2/ms ); 2259 } 2260 return ($part1,$part2,$part3); 2261} 2262 2263 2264 2265 2266 2267# bodydiff($old,$new) 2268sub bodydiff { 2269 my ($oldwords, $newwords) = @_; 2270 my @retwords; 2271 2272 print STDERR "(",exetime()," s)\n","Splitting into latex tokens \n" if $verbose; 2273 print STDERR "Parsing $oldfile \n" if $verbose; 2274 my @oldwords = splitlatex($oldwords); 2275 print STDERR "Parsing $newfile \n" if $verbose; 2276 my @newwords = splitlatex($newwords); 2277 2278 if ( $debug ) { 2279 open(TOKENOLD,">","latexdiff.debug.tokenold"); 2280 print TOKENOLD join("***\n",@oldwords); 2281 close(TOKENOLD); 2282 open(TOKENNEW,">","latexdiff.debug.tokennew"); 2283 print TOKENNEW join("***\n",@newwords); 2284 close(TOKENNEW); 2285 } 2286 2287 print STDERR "(",exetime()," s)\n","Pass 1: Expanding text commands and merging isolated identities with changed blocks " if $verbose; 2288 pass1(\@oldwords, \@newwords); 2289 2290 2291 print STDERR "(",exetime()," s)\n","Pass 2: inserting DIF tokens and mark up. " if $verbose; 2292 if ( $debug ) { 2293 open(TOKENOLD,">","latexdiff.debug.tokenold2"); 2294 print TOKENOLD join("***\n",@oldwords); 2295 close(TOKENOLD); 2296 open(TOKENNEW,">","latexdiff.debug.tokennew2"); 2297 print TOKENNEW join("***\n",@newwords); 2298 close(TOKENNEW); 2299 } 2300 2301 @retwords=pass2(\@oldwords, \@newwords); 2302 2303 return(@retwords); 2304} 2305 2306 2307 2308 2309# @words=splitlatex($string) 2310# split string according to latex rules 2311# Each element of words is either 2312# a word (including trailing spaces and punctuation) 2313# a latex command 2314# if there is white space in the beginning return that as first token 2315sub splitlatex { 2316 my ($inputstring) = @_ ; 2317 my $string=$inputstring ; 2318 # if input is empty, return empty list 2319 length($string)>0 or return (); 2320 $string=~s/^(\s*)//s; 2321 my $leadin=$1; 2322 length($string)>0 or return ($leadin); 2323 2324 my @retval=($string =~ m/$pat/osg); 2325 2326 if (length($string) != length(join("",@retval))) { 2327 print STDERR "\nWARNING: Inconsistency in length of input string and parsed string:\n This often indicates faulty or non-standard latex code.\n In many cases you can ignore this and the following warning messages.\n Note that character numbers in the following are counted beginning after \\begin{document} and are only approximate." unless $ignorewarnings; 2328 print STDERR "DEBUG Original length ",length($string)," Parsed length ",length(join("",@retval)),"\n" if $debug; 2329 print STDERR "DEBUG Input string: |$string|\n" if (length($string)<500) && $debug; 2330 print STDERR "DEBUG Token parsing: |",join("+",@retval),"|\n" if (length($string)<500) && $debug ; 2331 @retval=(); 2332 # slow way only do this if other m//sg method fails 2333 my $last = 0; 2334 while ( $string =~ m/$pat/osg ) { 2335 my $match=$&; 2336 if ($last + length $& != pos $string ) { 2337 my $pos=pos($string); 2338 my $offset=30<$last ? 30 : $last; 2339 my $dum=substr($string,$last-$offset,$pos-$last+2*$offset); 2340 my $dum1=$dum; 2341 my $cnt=$#retval; 2342 my $i; 2343 $dum1 =~ s/\n/ /g; 2344 unless ($ignorewarnings) { 2345 print STDERR "\n$dum1\n"; 2346 print STDERR " " x 30,"^" x ($pos-$last)," " x 30,"\n"; 2347 print STDERR "Missing characters near word " . (scalar @retval) . " character index: " . $last . "-" . pos($string) . " Length: " . length($match) . " Match: |$match| (expected match marked above).\n"; 2348 } 2349 # put in missing characters `by hand' 2350 push (@retval, substr($dum,$offset,$pos-$last-length($match))); 2351# Note: there seems to be a bug in substr with utf8 that made the following line output substr which were too long, 2352# using dum instead appears to work 2353# push (@retval, substr($string,$last, pos($string)-$last-length($match))); 2354 } 2355 push (@retval, $match); 2356 $last=pos $string; 2357 } 2358 2359 } 2360 2361 unshift(@retval,$leadin) if (length($leadin)>0); 2362 return @retval; 2363} 2364 2365 2366# pass1( \@seq1,\@seq2) 2367# Look for differences between seq1 and seq2. 2368# Where an common-subsequence block is flanked by deleted or appended blocks, 2369# and is shorter than $MINWORDSBLOCK words it is appended 2370# to the last deleted or appended word. If the block contains tokens other than words 2371# or punctuation it is not merged. 2372# Deleted or appended block consisting of words and safe commands only are 2373# also merged, to prevent break-up in pass2 (after previous isolated words have been removed) 2374# If there are commands with textual arguments (e.g. \caption) both in corresponding 2375# appended and deleted blocks split them such that the command and opening bracket 2376# are one token, then the rest is split up following standard rules, and the closing 2377# bracket is a separate token, ie. turn 2378# "\caption{This is a textual argument}" into 2379# ("\caption{","This ","is ","a ","textual ","argument","}") 2380# No return value. Destructively changes sequences 2381sub pass1 { 2382 my $seq1 = shift ; 2383 my $seq2 = shift ; 2384 2385 my $len1 = scalar @$seq1; 2386 my $len2 = scalar @$seq2; 2387 my $wpat=qr/^(?:[a-zA-Z.,'`:;?()!]*)[\s~]*$/; #' 2388 2389 my ($last1,$last2)=(-1,-1) ; 2390 my $cnt=0; 2391 my $block=[]; 2392 my $addblock=[]; 2393 my $delblock=[]; 2394 my $todo=[]; 2395 my $instruction=[]; 2396 my $i; 2397 my (@delmid,@addmid,@dummy); 2398 2399 my ($addcmds,$delcmds,$matchindex); 2400 my ($addtextblocks,$deltextblocks); 2401 my ($addtokcnt,$deltokcnt,$mattokcnt)=(0,0,0); 2402 my ($addblkcnt,$delblkcnt,$matblkcnt)=(0,0,0); 2403 2404 my $adddiscard = sub { 2405 if ($cnt > 0 ) { 2406 $matblkcnt++; 2407 # just after an unchanged block 2408# print STDERR "Unchanged block $cnt, $last1,$last2 \n"; 2409 if ($cnt < $MINWORDSBLOCK 2410 && $cnt==scalar ( 2411 grep { /^$wpat/ || ( /^\\((?:[`'^"~=.]|[\w\d@*]+))((?:\[$brat_n\]|\{$pat_n\})*)/o 2412 && iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL) 2413 && scalar(@dummy=split(" ",$2))<3 ) } 2414 @$block) ) { 2415 # merge identical blocks shorter than $MINWORDSBLOCK 2416 # and only containing ordinary words 2417 # with preceding different word 2418 # We cannot carry out this merging immediately as this 2419 # would change the index numbers of seq1 and seq2 and confuse 2420 # the algorithm, instead we store in @$todo where we have to merge 2421 push(@$todo, [ $last1,$last2,$cnt,@$block ]); 2422 } 2423 $block = []; 2424 $cnt=0; $last1=-1; $last2=-1; 2425 } 2426 }; 2427 my $discard=sub { $deltokcnt++; 2428 &$adddiscard; #($_[0],$_[1]); 2429 push(@$delblock,[ $seq1->[$_[0]],$_[0] ]); 2430 $last1=$_[0] }; 2431 2432 my $add = sub { $addtokcnt++; 2433 &$adddiscard; #($_[0],$_[1]); 2434 push(@$addblock,[ $seq2->[$_[1]],$_[1] ]); 2435 $last2=$_[1] }; 2436 2437 my $match = sub { $mattokcnt++; 2438 if ($cnt==0) { # first word of matching sequence after changed sequence or at beginning of word sequence 2439 $deltextblocks = extracttextblocks($delblock); 2440 $delblkcnt++ if scalar @$delblock; 2441 $addtextblocks = extracttextblocks($addblock); 2442 $addblkcnt++ if scalar @$addblock; 2443 2444 # make a list of all TEXTCMDLIST commands in deleted and added blocks 2445 $delcmds = extractcommands($delblock); 2446 $addcmds = extractcommands($addblock); 2447 # now find those text commands, which are found in both deleted and added blocks, and expand them 2448 # keygen(third argument of _longestCommonSubsequence) implies to sort on command (0th elements of $addcmd elements) 2449 # the calling format for longestCommonSubsequence has changed between versions of 2450 # Algorithm::Diff so we need to check which one we are using 2451 if ( $algodiffversion > 1.15 ) { 2452 ### Algorithm::Diff 1.19 2453 $matchindex=Algorithm::Diff::_longestCommonSubsequence($delcmds,$addcmds, 0, sub { $_[0]->[0] } ); 2454 } else { 2455 ### Algorithm::Diff 1.15 2456 $matchindex=Algorithm::Diff::_longestCommonSubsequence($delcmds,$addcmds, sub { $_[0]->[0] } ); 2457 } 2458 2459 for ($i=0 ; $i<=$#$matchindex ; $i++) { 2460 if (defined($matchindex->[$i])){ 2461 $j=$matchindex->[$i]; 2462 @delmid=splitlatex($delcmds->[$i][3]); 2463 @addmid=splitlatex($addcmds->[$j][3]); 2464 while (scalar(@$deltextblocks) && $deltextblocks->[0][0]<$delcmds->[$i][1]) { 2465 my ($index,$block,$cnt)=@{ shift(@$deltextblocks) }; 2466 push(@$todo, [$index,-1,$cnt,@$block]); 2467 } 2468 push(@$todo, [ $delcmds->[$i][1],-1,-1,$delcmds->[$i][2],@delmid,$delcmds->[$i][4]]); 2469 2470 while (scalar(@$addtextblocks) && $addtextblocks->[0][0]<$addcmds->[$j][1]) { 2471 my ($index,$block,$cnt)=@{ shift(@$addtextblocks) }; 2472 push(@$todo, [-1,$index,$cnt,@$block]); 2473 } 2474 push(@$todo, [ -1,$addcmds->[$j][1],-1,$addcmds->[$j][2],@addmid,$addcmds->[$j][4]]); 2475 } 2476 } 2477 # mop up remaining textblocks 2478 while (scalar(@$deltextblocks)) { 2479 my ($index,$block,$cnt)=@{ shift(@$deltextblocks) } ; 2480 push(@$todo, [$index,-1,$cnt,@$block]); 2481 } 2482 while (scalar(@$addtextblocks)) { 2483 my ($index,$block,$cnt)=@{ shift(@$addtextblocks) }; 2484 push(@$todo, [-1,$index,$cnt,@$block]); 2485 } 2486 2487 $addblock=[]; 2488 $delblock=[]; 2489 } 2490 push(@$block,$seq2->[$_[1]]); 2491 $cnt++ }; 2492 2493 my $keyfunc = sub { join(" ",split(" ",shift())) }; 2494 2495 traverse_sequences($seq1,$seq2, { MATCH=>$match, DISCARD_A=>$discard, DISCARD_B=>$add }, $keyfunc ); 2496 2497 2498 # now carry out the merging/splitting. Refer to elements relative from 2499 # the end (with negative indices) as these offsets don't change before the instruction is executed 2500 # cnt>0: merged small unchanged groups with previous changed blocks 2501 # cnt==-1: split textual commands into components 2502 foreach $instruction ( @$todo) { 2503 ($last1,$last2,$cnt,@$block)=@$instruction ; 2504 if ($cnt>=0) { 2505 splice(@$seq1,$last1-$len1,1+$cnt,join("",$seq1->[$last1-$len1],@$block)) if $last1>=0; 2506 splice(@$seq2,$last2-$len2,1+$cnt,join("",$seq2->[$last2-$len2],@$block)) if $last2>=0; 2507 } else { 2508 splice(@$seq1,$last1-$len1,1,@$block) if $last1>=0; 2509 splice(@$seq2,$last2-$len2,1,@$block) if $last2>=0; 2510 } 2511 } 2512 2513 if ($verbose) { 2514 print STDERR "\n"; 2515 print STDERR " $mattokcnt matching tokens in $matblkcnt blocks.\n"; 2516 print STDERR " $deltokcnt discarded tokens in $delblkcnt blocks.\n"; 2517 print STDERR " $addtokcnt appended tokens in $addblkcnt blocks.\n"; 2518 } 2519} 2520 2521 2522# extracttextblocks(\@blockindex) 2523# $blockindex has the following format 2524# [ [ token1, index1 ], [token2, index2],.. ] 2525# where index refers to the index in the original old or new word sequence 2526# Returns: reference to an array of the form 2527# [[ $index, $textblock, $cnt ], .. 2528# where $index index of block to be merged 2529# $textblock contains all the words to be merged with the word at $index (but does not contain this word) 2530# $cnt is length of block 2531# 2532# requires: iscmd 2533# 2534sub extracttextblocks { 2535 my $block=shift; 2536 my ($i,$token,$index); 2537 my $textblock=[]; 2538 my $last=-1; 2539 my $wpat=qr/^(?:[a-zA-Z.,'`:;?()!]*)[\s~]*$/; #' 2540 my $retval=[]; 2541 2542 # we redefine locally $extraspace (shadowing the global definition) to capture command sequences with intervening spaces no matter what the global setting 2543 # this is done so we can capture those commands with a predefined number of arguments without having to introduce them again explicitly here 2544 my $extraspace='\s*'; 2545 2546 for ($i=0;$i< scalar @$block;$i++) { 2547 ($token,$index)=@{ $block->[$i] }; 2548 # store pure text blocks 2549 if ($token =~ /$wpat/ || ( $token =~/^\\((?:[`'^"~=.]|[\w\d@\*]+))((?:${extraspace}\[$brat_n\]${extraspace}|${extraspace}\{$pat_n\})*)/ 2550 && iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL) 2551 && !iscmd($1,\@TEXTCMDLIST,\@TEXTCMDEXCL))) { 2552 # we have text or a command which can be treated as text 2553 if ($last<0) { 2554 # new pure-text block 2555 $last=$index; 2556 } else { 2557 # add to pure-text block 2558 push(@$textblock, $token); 2559 } 2560 } else { 2561 # it is not text 2562 if (scalar(@$textblock)) { 2563 push(@$retval,[ $last, $textblock, scalar(@$textblock) ]); 2564 } 2565 $textblock=[]; 2566 $last=-1; 2567 } 2568 } 2569 # finish processing a possibly unfinished block before returning 2570 if (scalar(@$textblock)) { 2571 push(@$retval,[ $last, $textblock, scalar(@$textblock) ]); 2572 } 2573 return($retval) 2574} 2575 2576 2577 2578# extractcommands( \@blockindex ) 2579# $blockindex has the following format 2580# [ [ token1, index1 ], [token2, index2],.. ] 2581# where index refers to the index in the original old or new word sequence 2582# Returns: reference to an array of the form 2583# [ [ "\cmd1", index, "\cmd1[optarg]{arg1}{", "arg2" ,"} " ],.. 2584# where index is just taken from input array 2585# command must have a textual argument as last argument 2586# 2587# requires: iscmd 2588# 2589sub extractcommands { 2590 my $block=shift; 2591 my ($i,$token,$index,$cmd,$open,$mid,$closing); 2592 my $retval=[]; 2593 2594 # we redefine locally $extraspace (shadowing the global definition) to capture command sequences with intervening spaces no matter what the global setting 2595 # this is done so we can capture those commands with a predefined number of arguments without having to introduce them again explicitly here 2596 my $extraspace='\s*'; 2597 2598 for ($i=0;$i< scalar @$block;$i++) { 2599 ($token,$index)=@{ $block->[$i] }; 2600 # check if token is an alphanumeric command sequence with at least one non-optional argument 2601 # \cmd[...]{...}{last argument} 2602 # Capturing in the following results in these associations 2603 # $1: \cmd[...]{...}{ 2604 # $2: \cmd 2605 # $3: last argument 2606 # $4: } + trailing spaces 2607 if ( ( $token =~ m/^(\\([\w\d\*]+)(?:${extraspace}\[$brat_n\]|${extraspace}\{$pat_n\})*${extraspace}\{)($pat_n)(\}\s*)$/so ) 2608 && iscmd($2,\@TEXTCMDLIST,\@TEXTCMDEXCL) ) { 2609 print STDERR "DEBUG EXTRACTCOMMANDS Match |$1|$2|$3|$4|$index \n" if $debug; 2610 # push(@$retval,[ $2,$index,$1,$3,$4 ]); 2611 ($cmd,$open,$mid,$closing) = ($2,$1,$3,$4) ; 2612 $closing =~ s/\}/\\RIGHTBRACE/ ; 2613 push(@$retval,[ $cmd,$index,$open,$mid,$closing ]); 2614 } 2615 } 2616 return $retval; 2617} 2618 2619# iscmd($cmd,\@regexarray,\@regexexcl) checks 2620# return 1 if $cmd matches any of the patterns in the 2621# array $@regexarray, and none of the patterns in \@regexexcl, otherwise return 0 2622sub iscmd { 2623 my ($cmd,$regexar,$regexexcl)=@_; 2624 my ($ret)=0; 2625 ### print STDERR "DEBUG: iscmd($cmd)=" if $debug; 2626 foreach $pat ( @$regexar ) { 2627 if ( $cmd =~ m/^${pat}$/ ) { 2628 $ret=1 ; 2629 last; 2630 } 2631 } 2632 ### print STDERR "0\n" if ($debug && !$ret) ; 2633 return 0 unless $ret; 2634 foreach $pat ( @$regexexcl ) { 2635 ### print STDERR "0\n" if ( $debug && $cmd =~ m/^${pat}$/) ; 2636 return 0 if ( $cmd =~ m/^${pat}$/ ); 2637 } 2638 ### print STDERR "1\n" if $debug; 2639 return 1; 2640} 2641 2642 2643# pass2( \@seq1,\@seq2) 2644# Look for differences between seq1 and seq2. 2645# Mark begin and end of deleted and appended sequences with tags $DELOPEN and $DELCLOSE 2646# and $ADDOPEN and $ADDCLOSE, respectively, however exclude { } & and all comands, unless 2647# they match an element of the whitelist (SAFECMD) 2648# For words in TEXTCMD but not in SAFECMD, enclose interior with $ADDOPEN and $ADDCLOSE brackets 2649# Deleted comment lines are marked with %DIF < 2650# Added comment lines are marked with %DIF > 2651sub pass2 { 2652 my $seq1 = shift ; 2653 my $seq2 = shift ; 2654 2655 my ($addtokcnt,$deltokcnt,$mattokcnt)=(0,0,0); 2656 my ($addblkcnt,$delblkcnt,$matblkcnt)=(0,0,0); 2657 2658 my $retval = []; 2659 my $delhunk = []; 2660 my $addhunk = []; 2661 2662 my $discard = sub { $deltokcnt++; 2663 push ( @$delhunk, $seq1->[$_[0]]) }; 2664 2665 my $add = sub { $addtokcnt++; 2666 push ( @$addhunk, $seq2->[$_[1]]) }; 2667 2668 my $match = sub { $mattokcnt++; 2669 if ( scalar @$delhunk ) { 2670 $delblkcnt++; 2671 # mark up changes, but comment out commands 2672 push @$retval,marktags($DELMARKOPEN,$DELMARKCLOSE,$DELOPEN,$DELCLOSE,$DELCMDOPEN,$DELCMDCLOSE,$DELCOMMENT,$delhunk); 2673 $delhunk = []; 2674 } 2675 if ( scalar @$addhunk ) { 2676 $addblkcnt++; 2677 # we mark up changes, but simply quote commands 2678 push @$retval,marktags($ADDMARKOPEN,$ADDMARKCLOSE,$ADDOPEN,$ADDCLOSE,"","",$ADDCOMMENT,$addhunk); 2679 $addhunk = []; 2680 } 2681 push(@$retval,$seq2->[$_[1]]) }; 2682 2683 my $keyfunc = sub { join(" ",split(" ",shift())) }; 2684 2685 traverse_sequences($seq1,$seq2, { MATCH=>$match, DISCARD_A=>$discard, DISCARD_B=>$add }, $keyfunc ); 2686 # clear up unprocessed hunks 2687 push @$retval,marktags($DELMARKOPEN,$DELMARKCLOSE,$DELOPEN,$DELCLOSE,$DELCMDOPEN,$DELCMDCLOSE,$DELCOMMENT,$delhunk) if scalar @$delhunk; 2688 push @$retval,marktags($ADDMARKOPEN,$ADDMARKCLOSE,$ADDOPEN,$ADDCLOSE,"","",$ADDCOMMENT,$addhunk) if scalar @$addhunk; 2689 2690 2691 if ($verbose) { 2692 print STDERR "\n"; 2693 print STDERR " $mattokcnt matching tokens. \n"; 2694 print STDERR " $deltokcnt discarded tokens in $delblkcnt blocks.\n"; 2695 print STDERR " $addtokcnt appended tokens in $addblkcnt blocks.\n"; 2696 } 2697 return(@$retval); 2698} 2699 2700# marktags($openmark,$closemark,$open,$close,$opencmd,$closecmd,$comment,\@block) 2701# returns ($openmark,$open,$block,$close,$closemark) if @block contains no commands (except white-listed ones), 2702# braces, ampersands, or comments 2703# mark comments with $comment 2704# exclude all other exceptions from scope of open, close like this 2705# ($openmark, $open,...,$close, $opencmd,command, command,$closecmd, $open, ..., $close, $closemark) 2706# If $opencmd begins with "%" marktags assumes it is operating on a deleted block, otherwise on an added block 2707sub marktags { 2708 my ($openmark,$closemark,$open,$close,$opencmd,$closecmd,$comment,$block)=@_; 2709 my $word; 2710 my (@argtext); 2711 my $retval=[]; 2712 my $noncomment=0; 2713 my $cmd=-1; # -1 at beginning 0: last token written is a ordinary word 2714 # 1: last token written is a command 2715 # for keeping track whether we are just in a command sequence or in a word sequence 2716 my $cmdcomment= ($opencmd =~ m/^%/); # Flag to indicate whether opencmd is a comment (i.e. if we intend to simply comment out changed commands) 2717 my ($command,$commandword,$closingbracket) ; # temporary variables needed below to remember sub-pattern matches 2718 2719 2720 2721# split this block to split sequences joined in pass1 2722 @$block=splitlatex(join "",@$block); 2723 ### print STDERR "DEBUG: marktags $openmark,$closemark,$open,$close,$opencmd,$closecmd,$comment\n" if $debug; 2724 ### print STDERR "DEBUG: marktags blocksplit ",join("|",@$block),"\n" if $debug; 2725 2726 # we redefine locally $extraspace (shadowing the global definition) to capture command sequences with intervening spaces no matter what the global setting 2727 # this is done so we can capture those commands with a predefined number of arguments without having to introduce them again explicitly here 2728 my $extraspace_mt='\s*'; 2729 2730 2731 foreach (@$block) { 2732 $word=$_; 2733 if ( $word =~ s/^%/%$comment/ ) { 2734 # a comment 2735 if ($cmd==1) { 2736 push (@$retval,$closecmd) ; 2737 $cmd=-1; 2738 } 2739 push (@$retval,$word); 2740 next; 2741 } 2742 if ( $word =~ m/^\s*$/ ) { 2743 ### print STDERR "DEBUG MARKTAGS: whitespace detected |$word| cmdcom |$cmdcomment| |$opencmd|\n" if $debug; 2744 # a sequence of white-space characters - this should only ever happen for the first element of block. 2745 # in deleted block, omit, otherwise just copy it in 2746 if ( ! $cmdcomment) { # ignore in deleted blocks 2747 push(@$retval,$word); 2748 } 2749 next; 2750 } 2751 if (! $noncomment) { 2752 push (@$retval,$openmark); 2753 $noncomment=1; 2754 } 2755 # negative lookahead pattern (?!) in second clause is put in to avoid matching \( .. \) patterns 2756 # also note that second pattern will match \\ 2757 if ( $word =~ /^[&{}\[\]]/ || ( $word =~ /^\\(?!\()(\\|[`'^"~=.]|[\w*@]+)/ && !iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL)) ) { 2758 ### if ( $word =~ /^[&{}\[\]]/ || ( $word =~ /^\\([\w*@\\% ]+)/ && !iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL)) ) { 2759 # word is a command or other significant token (not in SAFECMDLIST) 2760 ## same conditions as in subroutine extractcommand: 2761 # check if token is an alphanumeric command sequence with at least one non-optional argument 2762 # \cmd[...]{...}{last argument} 2763 # Capturing in the following results in these associations 2764 # $1: \cmd[...]{...}{ 2765 # $2: cmd 2766 # $3: last argument 2767 # $4: } + trailing spaces 2768 ### pre-0.3 if ( ( $token =~ m/^(\\([\w\d\*]+)(?:\[$brat0\]|\{$pat_n\})*\{)($pat_n)(\}\s*)$/so ) 2769 if ( ( $word =~ m/^(\\([\w\d\*]+)(?:${extraspace_mt}\[$brat_n\]|${extraspace_mt}\{$pat_n\})*${extraspace_mt}\{)($pat_n)(\}\s*)$/so ) 2770 && (iscmd($2,\@TEXTCMDLIST,\@TEXTCMDEXCL)|| iscmd($2,\@MATHTEXTCMDLIST,\@MATHTEXTCMDEXCL)) 2771 && ( !$cmdcomment || !iscmd($2,\@CONTEXT2CMDLIST, \@CONTEXT2CMDEXCL) ) ) { 2772 # Condition 1: word is a command? - if yes, $1,$2,.. will be set as above 2773 # Condition 2: word is a text command - we mark up the interior of the word. There is a separate check for MATHTEXTCMDLIST 2774 # because for $mathmarkup=WHOLE, the commands should not be split in pass1 (ie. math mode commands are not in 2775 # TEXTCMDLIST, but the interior of MATHTEXT commnds should be highlighted in both deleted and added blocks 2776 # Condition 3: But if we are in a deleted block ($cmdcomment=1) and 2777 # $2 (the command) is in context2, just treat it as an ordinary command (i.e. comment it open with $opencmd) 2778 # Because we do not want to disable this command 2779 # here we do not use $opencmd and $closecmd($opencmd is empty) 2780 if ($cmd==1) { 2781 push (@$retval,$closecmd) ; 2782 } elsif ($cmd==0) { 2783 push (@$retval,$close) ; 2784 } 2785 $command=$1; $commandword=$2; $closingbracket=$4; 2786 @argtext=splitlatex($3); # split textual argument into tokens 2787 # and mark it up (but we do not need openmark and closemark) 2788 # insert command with initial arguments, marked-up final argument, and closing bracket 2789 if ( $cmdcomment && iscmd($commandword,\@CONTEXT1CMDLIST, \@CONTEXT1CMDEXCL) ) { 2790 # context1cmd in a deleted environment; delete command itself but keep last argument, marked up 2791 push (@$retval,$opencmd); 2792 $command =~ s/\n/\n${opencmd}/sg ; # repeat opencmd at the beginning of each line 2793 # argument, note that the additional comment character is included 2794 # to suppress linebreak after opening parentheses, which is important 2795 # for latexrevise 2796 push (@$retval,$command,"%\n{$AUXCMD\n",marktags("","",$open,$close,$opencmd,$closecmd,$comment,\@argtext),$closingbracket); 2797 } elsif ( iscmd($commandword,,\@MATHTEXTCMDLIST, \@MATHTEXTCMDEXCL) ) { 2798 # MATHBLOCK pseudo command: consider all commands safe, except & and \\ 2799 # Keep these commands even in deleted blocks, hence set $opencmd and $closecmd (5th and 6th argument of marktags) to 2800 # "" 2801 local @SAFECMDLIST=(".*"); 2802 local @SAFECMDEXCL=('\\','\\\\',@UNSAFEMATHCMD); 2803 push(@$retval,$command,marktags("","",$open,$close,"","",$comment,\@argtext)#@argtext 2804 ,$closingbracket); 2805 } else { 2806 # normal textcmd or context1cmd in an added block 2807 push (@$retval,$command,marktags("","",$open,$close,$opencmd,$closecmd,$comment,\@argtext),$closingbracket); 2808 } 2809 push (@$retval,$AUXCMD,"\n") if $cmdcomment ; 2810 $cmd=-1 ; 2811 } elsif ( $cmdcomment && 2812 ( $word =~ m/^(\\([\w\d\*]+)(?:${extraspace_mt}\[$brat_n\]|${extraspace_mt}\{$pat_n\})*${extraspace_mt}\{)($pat_n)(\}\s*)/so ) 2813 && iscmd($2,\@KEEPCMDLIST, \@KEEPCMDEXCL) ) { 2814 # 'keepcmd' in a deleted environment: keep the command as is 2815 push (@$retval,$close) if $cmd==0 ; 2816 push (@$retval,$word); 2817 $cmd=-1; # pretend we are at the beginning of a sequence because we do not want to add an additional $closecmd or $close before the next token, no matter what it is 2818 } else { 2819 # ordinary command 2820 push (@$retval,$opencmd) if $cmd==-1 ; 2821 push (@$retval,$close,$opencmd) if $cmd==0 ; 2822 $word =~ s/\n/\n${opencmd}/sg if $cmdcomment ; # if opencmd is a comment, repeat this at the beginning of every line 2823 ### print STDERR "MARKTAGS: Add command |$word|\n"; 2824 push (@$retval,$word); 2825 $cmd=1; 2826 } 2827 } else { 2828 ###print STDERR "DEBUG MARKTAGS is an ordinary word or SAFECMD command \n" if $debug; 2829 # just an ordinary word or command in SAFECMD 2830 push (@$retval,$open) if $cmd==-1 ; 2831 push (@$retval,$closecmd,$open) if $cmd==1 ; 2832 ###TODO: check here if it is a command in MBOXCMD list, and surround it with \mbox{...} 2833 ### $word =~ /^\\(?!\()(\\|[`'^"~=.]|[\w*@]+)/ && iscmd($1,\@MBOXCMDLIST,\@MBOXCMDEXCL)) 2834 ### but actually this check has been carried out already so can simply check if word begins with backslash 2835 if ( $word =~ /^\\(?!\()(\\|[`'^"~=.]|[\w*@]+)(.*?)(\s*)$/s && iscmd($1,\@MBOXCMDLIST,\@MBOXCMDEXCL)) { 2836 # $word is a safe command in MBOXCMDLIST 2837 ###print STDERR "DEBUG Mboxsafecmd detected:$word:\n" if $debug ; 2838 push(@$retval,"\\mbox{$AUXCMD\n\\" . $1 . $2 . $3 ."}\\hspace{0pt}$AUXCMD\n" ); 2839 } else { 2840 # $word is a normal word or a safe command (not in MBOXCMDLIST) 2841 push (@$retval,$word); 2842 } 2843 $cmd=0; 2844 } 2845 } 2846 push (@$retval,$close) if $cmd==0; 2847 push (@$retval,$closecmd) if $cmd==1; 2848 2849 push (@$retval,$closemark) if ($noncomment); 2850 return @$retval; 2851} 2852 2853#used in preprocess 2854sub take_comments_and_newline_from_frac() { 2855 # some special magic for common usage of frac, which does not conform to the latexdiff requirements but can be made to fit 2856 # note that this is a rare exception to the general rule that the new tex can be reconstructed from the diff file 2857 2858 # \frac12 -> \frac{1}{2} 2859 s/\\frac(\d)(\w)/\\frac\{$1\}\{$2\}/g; 2860 2861 # \frac1{2b} -> \frac{1}{2b} 2862 s/\\frac(\d)/\\frac\{$1\}/g; 2863 2864 # delete space and comment characters between \frac arguments 2865# s/\\frac(?:\s*?%[^\n]*?)*?(\{$pat_n\})\s*(\{$pat_n\})/\\frac$1$2/g; 2866 s/\\frac(?:\s|%[^\n]*?)*(\{$pat_n\})(?:\s|%[^\n]*?)*(\{$pat_n\})/\\frac$1$2/g; 2867} 2868 2869# preprocess($string, ..) 2870# carry out the following pre-processing steps for all arguments: 2871# 1. Remove leading white-space 2872# Change \{ to \QLEFTBRACE and \} to \QRIGHTBRACE and \& to \AMPERSAND 2873# #. Change {,} in comments to \CLEFTBRACE, \CRIGHTBRACE 2874# 2. mark all first empty line (in block of several) with \PAR tokens 2875# 3. Convert all '\%' into '\PERCENTAGE ' and all '\$' into \DOLLAR to make parsing regular expressions easier 2876# 4. Convert all \verb|some verbatim text| commands (where | can be an arbitrary character) 2877# into \verb{hash} (also lstinline) 2878# 5. Convert \begin{verbatim} some verbatim text \end{verbatim} into \verbatim{hash} (not only verbatim, all patterns matching VERBATIMENV) 2879# 6. Convert _n into \SUBSCRIPTNB{n} and _{nnn} into \SUBSCRIPT{nn} 2880# 7. Convert ^n into \SUPERSCRIPTNB{n} and ^{nnn} into \SUPERSCRIPT{nn} 2881# 8. a. Convert $$ $$ into \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR} 2882# b. Convert \[ \] into \begin{SQUAREBRACKET} \end{SQUAREBRACKET} 2883# 9. Convert all picture environmentent (\begin{PICTUREENV} .. \end{PICTUREENV} \PICTUREBLOCKenv 2884# For math-mode COARSE,WHOLE or NONE option -convert all \begin{MATH} .. \end{MATH} 2885# into \MATHBLOCKmath{...} commands, where MATH/math is any valid math environment 2886 2887# 10. Add final token STOP to the very end. This is put in because the algorithm works better if the last token is identical. This is removed again in postprocessing. 2888# 2889# NB: step 6 and 7 is likely to convert some "_" inappropriately, e.g. in file 2890# names or labels but it does not matter because they are converted back in the postprocessing step 2891# Returns: leading white space removed in step 1 2892sub preprocess { 2893 for (@_) { 2894 2895 2896 # change in \verb and similar commands - note that I introduce an extra space here so that the 2897 # already hashed variants do not trigger again 2898 # transform \lstinline{...} 2899# s/\\lstinline(\[$brat0\])?(\{(?:.*?)\})/"\\DIFlstinline". $1 ."{". tohash(\%verbhash,"$2") ."}"/esg; 2900# s/\\lstinline(\[$brat0\])?((\S).*?\2)/"\\DIFlstinline". $1 ."{". tohash(\%verbhash,"$2") ."}"/esg; 2901 s/\\lstinline((?:\[$brat_n\])?)(\{(?:.*?)\})/"\\DIFlstinline". $1 ."{". tohash(\%verbhash,"$2") ."}"/esg; 2902 s/\\lstinline((?:\[$brat_n\])?)(([^\s\w]).*?\3)/"\\DIFlstinline". $1 ."{". tohash(\%verbhash,"$2") ."}"/esg; 2903 s/\\(verb\*?|lstinline)([^\s\w])(.*?)\2/"\\DIF${1}{". tohash(\%verbhash,"${2}${3}${2}") ."}"/esg; 2904 2905 # Change \{ to \QLEFTBRACE, \} to \QRIGHTBRACE, and \& to \AMPERSAND 2906 s/(?<!\\)\\\{/\\QLEFTBRACE /sg; 2907 s/(?<!\\)\\\}/\\QRIGHTBRACE /sg; 2908 s/(?<!\\)\\&/\\AMPERSAND /sg; 2909# replace {,} in comments with \\CLEFTBRACE,\\CRIGHTBRACE 2910 1 while s/((?<!\\)%.*)\{(.*)$/$1\\CLEFTBRACE $2/mg ; 2911 1 while s/((?<!\\)%.*)\}(.*)$/$1\\CRIGHTBRACE $2/mg ; 2912 s/(?<!\\)\\%/\\PERCENTAGE /g ; # (?<! is negative lookbehind assertion to prevent \\% from being converted 2913 s/(?<!\\)\\\$/\\DOLLAR /g ; # (?<! is negative lookbehind assertion to prevent \\$ from being converted 2914 s/\\begin\{($VERBATIMENV)\}(.*?)\\end\{\1\}/"\\${1}{". tohash(\%verbhash,"${2}") . "}"/esg; 2915 s/\\begin\{($VERBATIMLINEENV)\}(.*?)\\end\{\1\}/"\\begin{$1}". linecomment($2) . "\\end{$1}"/esg; 2916 2917 # mark all first empty line (in block of several) with \PAR tokens 2918 s/\n(\s*?)\n((?:\s*\n)*)/\n$1\\PAR\n$2/g ; 2919 # Convert _n or _\cmd into \SUBSCRIPTNB{n} or \SUBSCRIPTNB{\cmd} and _{nnn} into \SUBSCRIPT{nn} 2920 1 while s/(?<!\\)_(\s*([^{\\\s]|\\\w+))/\\SUBSCRIPTNB{$1}/g ; 2921 1 while s/(?<!\\)_(\s*{($pat_n)})/\\SUBSCRIPT$1/g ; 2922 # Convert ^n into \SUPERSCRIPTNB{n} and ^{nnn} into \SUPERSCRIPT{nn} 2923 1 while s/(?<!\\)\^(\s*([^{\\\s]|\\\w+))/\\SUPERSCRIPTNB{$1}/g ; 2924 1 while s/(?<!\\)\^(\s*{($pat_n)})/\\SUPERSCRIPT$1/g ; 2925 # Convert \sqrt{n} into \SQRT{n} and \sqrt nn into SQRTNB{nn} 2926 1 while s/(?<!\\)\\sqrt(\s*([^{\\\s]|\\\w+))/\\SQRTNB{$1}/g ; 2927 1 while s/(?<!\\)\\sqrt(\s*{($pat_n)})/\\SQRT$1/g ; 2928 # Convert $$ $$ into \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR} 2929 s/\$\$(.*?)\$\$/\\begin{DOLLARDOLLAR}$1\\end{DOLLARDOLLAR}/sg; 2930 # Convert \[ \] into \begin{SQUAREBRACKET} \end{SQUAREBRACKET} 2931 s/(?<!\\)\\\[/\\begin{SQUAREBRACKET}/sg; 2932 s/\\\]/\\end{SQUAREBRACKET}/sg; 2933 # Convert all picture environmentent (\begin{PICTUREENV} .. \end{PICTUREENV} \PICTUREBLOCKenv 2934 s/\\begin\{($PICTUREENV)}(.*?)\\end\{\1}/\\PICTUREBLOCK$1\{$2\}/sg; 2935 # For math-mode COARSE,WHOLE or NONE option -convert all \begin{MATH} .. \end{MATH} 2936 # into \MATHBLOCKMATH{...} commands, where MATH is any valid math environment 2937 # Also convert all array environments into ARRAYBLOCK environments 2938 2939 if ( $mathmarkup != FINE ) { 2940 # DIFANCHORARRB and DIFANCHORARRE, DIFANCHORMATHB and DIFANCHORMATHE markers are inserted here to encourage the matching algorithm 2941 # to always match up the closing brace. Otherwise sometimes one ends up with a situation where 2942 # the closing brace is deleted and added at another point. The deleted closing brace is then 2943 # prevented by a %DIFDELCMD, leading to material leaking in or out of the math environment. 2944 # The anchors are removed in post-processing again. (note that they are simple text to cause least amount of complications 2945 # Admittedly, this is something of a hack and will not always work. If it does not, then one needs to 2946 # resort to WHOLE or FINE, or NONE math mode processing. 2947 s/\\begin\{($ARRENV)}(.*?)\\end\{\1}/\\ARRAYBLOCK$1\{$2\\DIFANCHORARRB \}\\DIFANCHORARRE /sg; 2948 2949 take_comments_and_newline_from_frac(); 2950 2951 s/\\begin\{($MATHENV|$MATHARRENV|SQUAREBRACKET)\}(.*?)\\end\{\1\}/\\MATHBLOCK$1\{$2\\DIFANCHORMATHB \}\\DIFANCHORMATHE /sg; 2952 } 2953 2954 # add final token " STOP" 2955 $_ .= " STOP" 2956 } 2957} 2958 2959 2960# $expanded=linecomment($string) 2961#preface all lines with verbatim marker (usually DIFVRB) 2962sub linecomment { 2963 my @verbatimlines=split("\n",$_[0]); 2964 # the first line needs special treatment - we do want to retain optional arguments as is but wrap the remainder also with VERBCOMMENT 2965 ### print STDERR "DEBUG: before verbatimlines[0] = ",$verbatimlines[0],"\n"; 2966 $verbatimlines[0]=~s/^((?:\s*\[$brat_n\])?\s*)([^\s\[].*)/ defined($2) ? ( "$1\%$VERBCOMMENT$2" ) : ( $1 )/e; 2967 ### print STDERR "DEBUG: after verbatimlines[0] = ",$verbatimlines[0],"\n"; 2968 return(join("\n%$VERBCOMMENT",@verbatimlines)."\n"); 2969} 2970 2971# $simple=reverselinecomment($env $string) 2972# remove DIFVRB comments but leave changed lines marked 2973sub reverselinecomment { 2974 my ($environment, $verbatimtext)=@_; 2975 ###print STDERR "OLD VERBATIMTEXT: |$verbatimtext|\n"; 2976 # remove markup added by latexdiff 2977 # (this should occur only if the type of verbatim environment was changed) 2978 # (note that this destroys some information in old file) 2979 # in theory I could save it by moving it out of the verbatim environment 2980 # but this requires more bookkeeping and is probably not necessary) 2981 $verbatimtext =~ s/\\DIFaddbegin //g; 2982 $verbatimtext =~ s/\\DIFaddend //g; 2983 $verbatimtext =~ s/\\DIFdelbegin //g; 2984 $verbatimtext =~ s/\\DIFdelend //g; 2985 $verbatimtext =~ s/$DELCMDOPEN.*//g; 2986 2987 # remove DIFVRB mark 2988 $verbatimtext=~ s/%$VERBCOMMENT//g; 2989 2990 # remove part of the markup in changed lines 2991 # if any of these substitution was made, then there was at least 2992 # one changed line, and we have to extend the style 2993 if ( $verbatimtext=~ s/$VERBCOMMENT//g ) { 2994 # in the next line we add ~alsolanguage~ modifier, but also deletes the rest of the line after the optional argument, as lstlisting commands gets sometimes 2995 # very confused by what is there (and othertimes seems to ignore this anyway) 2996 unless ( $verbatimtext =~ s/^(\s*)\[($brat_n)\](.*)\n/$1\[$2,alsolanguage=DIFcode\]\n/ ) { 2997 if ( $verbatimtext =~ m/^\s*\n/ ) { 2998 $verbatimtext = "[alsolanguage=DIFcode]" . $verbatimtext; 2999 } else { 3000 $verbatimtext = "[alsolanguage=DIFcode]\n" . $verbatimtext; 3001 } 3002 } 3003 # There is a bug in listings package (at least v1.5b) for empty comments where the actual comment command is not made invisible 3004 # I therefore have to introduce an artificial '-' character at the end of empty added or deleted lines 3005 $verbatimtext =~ s/($DELCOMMENT\s*)$/$1-/mg; 3006 $verbatimtext = "\\DIFmodbegin\n\\begin{${environment}}${verbatimtext}\\end{${environment}}\n\\DIFmodend" 3007 } else { 3008 $verbatimtext = "\\begin{${environment}}${verbatimtext}\\end{${environment}}" 3009 } 3010 ###print STDERR "NEW VERBATIMTEXT: |$verbatimtext|\n"; 3011 return($verbatimtext); 3012} 3013 3014 3015#hashstring=tohash(\%hash,$string) 3016# creates a hash value based on string and stores in %hash 3017sub tohash { 3018 my ($hash,$string)=@_; 3019 my (@arr,$val); 3020 my ($sum,$i)=(0,1); 3021 my ($hstr); 3022 3023 @arr=unpack('c*',$string); 3024 3025 while (1) { 3026 foreach $val (@arr) { 3027 $sum += $i*$val; 3028 $i++; 3029 } 3030 $hstr= "$sum"; 3031 last unless (defined($hash->{$hstr}) && $string ne $hash->{$hstr}); 3032 # else found a duplicate HASH need to repeat for a higher hash value 3033 } 3034 $hash->{$hstr}=$string; 3035 ### print STDERR "Hash:$hstr: Content:$string:\n"; 3036 return($hstr); 3037} 3038 3039#string=fromhash(\%hash,$fromstring) 3040# restores string value stored in hash 3041#string=fromhash(\%hash,$fromstring,$prependstring) 3042# additionally begins each line with prependstring 3043sub fromhash { 3044 my ($hash,$hstr)=($_[0],$_[1]); 3045 my $retstr=$hash->{$hstr}; 3046 if ( $#_ >= 2) { 3047 $retstr =~ s/^/$_[2]/mg; 3048 } 3049 return $retstr; 3050} 3051 3052# writedebugfile(string, label) 3053# if $debug set writes <string> to file latexdiff.debug.<label> 3054# otherwise do nothing 3055sub writedebugfile { 3056 my ($string,$label)=@_; 3057 if ( $debug ) { 3058 open(RAWDIFF,">","latexdiff.debug." . $label); 3059 print RAWDIFF $string; 3060 close(RAWDIFF); 3061 } 3062} 3063 3064 3065# postprocess($string, ..) 3066# carry out the following post-processing steps for all arguments: 3067# * Remove STOP token from the end 3068# * Replace \RIGHTBRACE by } 3069# * change citation commands within comments to protect from processing (using marker CITEDIF) 3070# 1. Check all deleted blocks: 3071# a.where a deleted block contains a matching \begin and 3072# \end environment (these will be disabled by a %DIFDELCMD statements), for selected environments enable 3073# these commands again (such that for example displayed math in a deleted equation 3074# is properly within math mode. For math mode environments replace numbered equation 3075# environments with their display only variety (so that equation numbers in new file and 3076# diff file are identical). Where the correct type of math environment cannot be determined 3077# use a place holder MATHMODE 3078# b.where one of the commands matching $COUNTERCMD is used as a DIFAUXCMD, add a statement 3079# subtracting one from the respective counter to keep numbering consistent with new file 3080# Replace all MATHMODE environment commands by the correct environment to achieve matching 3081# pairs 3082# c. Convert MATHBLOCKmath commands to their uncounted numbers (e.g. convert equation -> displaymath 3083# (environments defined in $MATHENV will be replaced by $MATHREPL, and environments in $MATHARRENV 3084# will be replaced by $MATHARRREPL 3085# d. If in-line math mode contains array environment, enclose the whole environment in \mbox'es 3086# d. place \cite commands in mbox'es (for UNDERLINE style) 3087# 3088# For added blocks: 3089# c. If in-line math mode contains array environment, enclose the whole environment in \mbox'es 3090# d. place \cite commands in mbox'es (for UNDERLINE style) 3091# 3092# 2. If math-mode COARSE,WHOLE or NONE option set: Convert \MATHBLOCKmath{..} commands back to environments 3093# 3094# Convert all PICTUREblock{..} commands back to the appropriate environments 3095# 3. Convert DIFadd, DIFdel, DIFaddbegin , ... into FL varieties 3096# within floats (currently recognised float environments: plate,table,figure 3097# plus starred varieties). 3098# 4. Remove empty %DIFDELCMD < lines 3099# 4. Convert \begin{SQUAREBRACKET} \end{SQUAREBRACKET} into \[ \] 3100# Convert \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR} into $$ $$ 3101# 5. Convert \SUPERSCRIPTNB{n} into ^n and \SUPERSCRIPT{nn} into ^{nnn} 3102# 6. Convert \SUBSCRIPTNB{n} into _n and \SUBCRIPT{nn} into _{nnn} 3103# 7. Expand hashes of verb and verbatim environments 3104# 8. Convert '\PERCENTAGE ' back into '\%' and '\DOLLAR ' into '\$' 3105# 9.. remove all \PAR tokens 3106# 10. package specific processing: endfloat: make sure \begin{figure} and \end{figure} are always 3107# on a line by themselves, similarly for table environment 3108# 4, undo renaming of the \begin, \end,{,} in comments 3109# Change \QLEFTBRACE, \QRIGHTBRACE,\AMPERSAND to \{,\},\& 3110# 3111# Note have to manually synchronize substitution commands below and 3112# DIF.. command names in the header 3113sub postprocess { 3114 my ($begin,$len,$cnt,$float,$delblock,$addblock); 3115 # second level blocks 3116 my ($begin2,$cnt2,$len2,$eqarrayblock,$mathblock); 3117 3118 my (@textparts,@newtextparts,@liststack,$listtype,$listlast); 3119 3120 my (@itemargs, $itemarg); 3121 3122 3123 for (@_) { 3124 # change $'s in comments to something harmless 3125 1 while s/(%.*)\$/$1DOLLARDIF/mg ; 3126 3127 # Remove final STOP token 3128 s/ STOP$//; 3129 # Replace \RIGHTBRACE in comments by \MBLOCKRIGHTBRACE 3130 # the only way to get these is as %DIFDELCMD < \RIGHTBRACE construction 3131 # This essentially marks closing right braces of MATHBLOCK environments, which did not get matched 3132 # up. This case should be rare, so I just leave this in the diff file output. Not really elegant 3133 # but can still be dealt with later if it results in problems. 3134 s/%DIFDELCMD < \\RIGHTBRACE/%DIFDELCMD < \\MBLOCKRIGHTBRACE/g ; 3135 # Replace \RIGHTBRACE by } 3136 s/\\RIGHTBRACE/}/g; 3137 3138 # Check all deleted blocks: where a deleted block contains a matching \begin and 3139 # \end environment (these will be disabled by a %DIFDELCMD statements), enable 3140 # these commands again (such that for example displayed math in a deleted equation 3141 # is properly within math mode). For math mode environments replace numbered equation 3142 # environments with their display only variety (so that equation numbers in new file and 3143 # diff file are identical) 3144 3145 while ( m/\\DIFdelbegin.*?\\DIFdelend/sg ) { 3146 ### while ( m/\\DIFdelbegin.*?\\DIFdelend/sg ) { 3147 ### print STDERR "DEBUG Match delblock \n||||$&||||\n at ",pos,"\n"; 3148 $cnt=0; 3149 $len=length($&); 3150 $begin=pos($_) - $len; 3151 $delblock=$&; 3152 ### A much simpler method for math replacement might follow this strategy (can recycle part of the commands below for following 3153 ### this strategy: 3154 ### 1. a Insert aux commands \begin{MATHMODE} or \end{MATHMODE} for all deleted commands opening or closing displayed math mode 3155 ### b Insert aux commands \begin{MATHARRMODE} or \end{MATHARRMODE} for all deleted commands opening or closing math array mode 3156 ### 2 Replace MATHMODE and MATHARRMODE by correct pairing if appropriate partner math command is found in text 3157 ### 3 a Replace remaining \begin{MATHMODE}...\end{MATHMODE} pairs with \begin{$MATHREPL}..\end{$MATHREPL} 3158 ### b Replace remaining \begin{MATHARRMODE}...\end{MATHARRMODE} pairs with \begin{$MATHREPL}..\end{$MATHREPL} 3159 ### 4 Delete all aux command math mode pairs which have simply comments or empty lines between them 3160 ### As written this won't actually work! 3161 3162 3163 ### Most general case: allow all included environments 3164 ### $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{(\w*\*?)\}\s*?\n)(.*?)(\%DIFDELCMD < \s*\\end\{\2\})/$1\\begin{$2}$AUXCMD\n$3\n\\end{$2}$AUXCMD\n$4/sg; 3165 ### (.*?[^\n]?)\n? construct is necessary to avoid empty lines in math mode, which result in 3166 ### an error 3167 # displayed math environments 3168 ###0.5: $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHREPL}$AUXCMD\n$1$3\n\\end{$MATHREPL}$AUXCMD\n$4/sg; 3169 if ($mathmarkup == FINE ) { 3170 $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{((?:$MATHENV)|SQUAREBRACKET)\}.*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHREPL}$AUXCMD\n$1$3\n\\end{$MATHREPL}$AUXCMD\n$4/sg; 3171 # also transform the opposite pair \end{displaymath} .. \begin{displaymath} but we have to be careful not to interfere with the results of the transformation in the line directly above 3172 ### pre-0.42 obsolete version which did not work on eqnarray test $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHENV)\}\s*?\n)(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/$1\\end{$MATHREPL}$AUXCMD\n$3\n\\begin{$MATHREPL}$AUXCMD\n$4/sg; 3173 ###0.5: $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/\\end{MATHMODE}$AUXCMD\n$1$3\n\\begin{MATHMODE}$AUXCMD\n$4/sg; 3174 $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{((?:$MATHENV)|SQUAREBRACKET)\}.*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/\\end\{MATHMODE\}$AUXCMD\n$1$3\n\\begin\{MATHMODE\}$AUXCMD\n$4/sg; 3175 3176 # now look for unpaired %DIFDELCMD < \begin{MATHENV}; if found add \begin{$MATHREPL} and insert \end{$MATHREPL} 3177 # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted 3178 if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\begin{$MATHREPL}$AUXCMD\n/sg ) { 3179 $delblock =~ s/(\\DIFdelend$)/\\end{$MATHREPL}$AUXCMD\n$1/s ; 3180 } 3181 # now look for unpaired %DIFDELCMD < \end{MATHENV}; if found add \end{MATHMODE} and insert \begin{MATHMODE} 3182 # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted 3183 if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\end\{MATHMODE\}$AUXCMD\n/sg ) { 3184 $delblock =~ s/(\\DIFdelend$)/\\begin\{MATHMODE\}$AUXCMD\n$1/s ; 3185 } 3186 3187 3188 ### pre-0.42 # same as above for special case \[.\] (latex abbreviation for displaymath) 3189 ### pre-0.42 $delblock=~ s/(\%DIFDELCMD < \s*\\\[\s*?\n())(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\\])/$1\\\[$AUXCMD\n$3\n\\\]$AUXCMD\n$4/sg; 3190 ### pre-0.42 $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\\]\s*?\n())(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\\[)/$1\\\]$AUXCMD\n$3\n\\\[$AUXCMD\n$4/sg; 3191 # equation array environment 3192 ###pre-0.3 $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}\s*?\n)(.*?)(\%DIFDELCMD < \s*\\end\{\2\})/$1\\begin{$MATHARRREPL}$AUXCMD\n$3\n\\end{$MATHARRREPL}$AUXCMD\n$4/sg; 3193 ###0.5 $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHARRREPL}$AUXCMD\n$1$3\n\\end{$MATHARRREPL}$AUXCMD\n$4/sg; 3194 $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}.*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHARRREPL}$AUXCMD\n$1$3\n\\end{$MATHARRREPL}$AUXCMD\n$4/sg; 3195 ### pre-0.42 obsolete version which did not work on eqnarray test $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHARRENV)\}\s*?\n)(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/$1\\end{$MATHARRREPL}$AUXCMD\n$3\n\\begin{$MATHARRREPL}$AUXCMD\n$4/sg; 3196 $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/\\end{MATHMODE}$AUXCMD\n$1$3\n\\begin{MATHMODE}$AUXCMD\n$4/sg; 3197 3198 # now look for unpaired %DIFDELCMD < \begin{MATHARRENV}; if found add \begin{$MATHARRREPL} and insert \end{$MATHARRREPL} 3199 # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted 3200 if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\begin{$MATHARRREPL}$AUXCMD\n/sg ) { 3201 $delblock =~ s/(\\DIFdelend$)/\\end{$MATHARRREPL}$AUXCMD\n$1/s ; 3202 } 3203 # now look for unpaired %DIFDELCMD < \end{MATHENV}; if found add \end{MATHMODE} and insert \begin{MATHMODE} 3204 # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted 3205 if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\end{MATHMODE}$AUXCMD\n/sg ) { 3206 $delblock =~ s/(\\DIFdelend$)/\\begin{MATHMODE}$AUXCMD\n$1/s ; 3207 } 3208 3209 # parse $delblock for deleted and reinstated eqnarray* environments - within those reinstate \\ and & commands 3210 ### while ( $delblock =~ m/\\begin{$MATHARRREPL}$AUXCMD\n.*?\n\\end{$MATHARRREPL}$AUXCMD\n/sg ) { 3211 while ( $delblock =~ m/\\begin\Q{$MATHARRREPL}$AUXCMD\E\n.*?\n\\end\Q{$MATHARRREPL}$AUXCMD\E\n/sg ) { 3212 ### print STDERR "DEBUG Match eqarrayblock $& at ",pos,"\n"; 3213 $cnt2=0; 3214 $len2=length($&); 3215 $begin2=pos($delblock) - $len2; 3216 $eqarrayblock=$&; 3217 # reinstate deleted & and \\ commands 3218 $eqarrayblock=~ s/(\%DIFDELCMD < \s*(\&|\\\\)\s*?(?:$DELCMDCLOSE|\n))/$1$2$AUXCMD\n/sg ; 3219 3220 substr($delblock,$begin2,$len2)=$eqarrayblock; 3221 pos($delblock) = $begin2 + length($eqarrayblock); 3222 } 3223 } elsif ( $mathmarkup == COARSE || $mathmarkup == WHOLE ) { 3224 # Convert MATHBLOCKmath commands to their uncounted numbers (e.g. convert equation -> displaymath 3225 # (environments defined in $MATHENV will be replaced by $MATHREPL, and environments in $MATHARRENV 3226 # will be replaced by $MATHARRREPL 3227 $delblock=~ s/\\MATHBLOCK($MATHENV)\{($pat_n)\}/\\MATHBLOCK$MATHREPL\{$2\}/sg; 3228 $delblock=~ s/\\MATHBLOCK($MATHARRENV)\{($pat_n)\}/\\MATHBLOCK$MATHARRREPL\{$2\}/sg; 3229 } 3230 # Reinstate completely deleted list environments. note that items within the 3231 # environment will still be commented out. They will be restored later 3232 $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{($LISTENV)\}\s*?(?:\n|$DELCMDCLOSE))(.*?)(\%DIFDELCMD < \s*\\end\{\2\})/{ 3233 ### # block within the search; replacement environment 3234 ### "$1\\begin{$2}$AUXCMD\n". restore_item_commands($3). "\n\\end{$2}$AUXCMD\n$4"; 3235 "$1\\begin{$2}$AUXCMD\n$3\n\\end{$2}$AUXCMD\n$4"; 3236 }/esg; 3237 3238 ### $delblock=~ s/\\begin\{$MATHENV}$AUXCMD/\\begin{$MATHREPL}$AUXCMD/g; 3239 ### $delblock=~ s/\\end\{$MATHENV}$AUXCMD/\\end{$MATHREPL}$AUXCMD/g; 3240 ### $delblock=~ s/\\begin\{$MATHARRENV}$AUXCMD/\\begin{$MATHARRREPL}$AUXCMD/g; 3241 ### $delblock=~ s/\\end\{$MATHARRENV}$AUXCMD/\\end{$MATHARRREPL}$AUXCMD/g; 3242 3243 # b.where one of the commands matching $COUNTERCMD is used as a DIFAUXCMD, add a statement 3244 # subtracting one from the respective counter to keep numbering consistent with new file 3245 $delblock=~ s/\\($COUNTERCMD)((?:${extraspace}\[$brat_n\]${extraspace}|${extraspace}\{$pat_n\})*\s*${AUXCMD}\n)/\\$1$2\\addtocounter{$1}{-1}${AUXCMD}\n/sg ; 3246 3247 # bb. disable active labels within deleted blocks (i.e. those not commented out) (as these are not safe commands, this should normally only 3248 # happen within deleted maths blocks 3249 ### $delblock=~ s/(?<!$DELCMDOPEN)(\\$LABELCMD(?:${extraspace})\{(?:[^{}])*\}[\t ]*)\n?/${DELCMDOPEN}$1${DELCMDCLOSE}/smg ; 3250 ### previous line caused trouble as by issue #90 I might need to modify this 3251 $delblock=~ s/^([^%]*)(\\$LABELCMD(?:${extraspace})\{(?:[^{}])*\}[\t ]*)\n?/$1${DELCMDOPEN}$2${DELCMDCLOSE}/smg ; 3252 ### print STDERR "<<<$delblock>>>\n" if $debug; 3253 3254 3255 # c. If in-line math mode contains array environment, enclose the whole environment in \mbox'es 3256 while ( $delblock =~ m/($math)(\s*)/sg ) { 3257 # print STDERR "DEBUG Delblock Match math $& at ",pos,"\n"; 3258 $cnt2=0; 3259 $len2=length($&); 3260 $begin2=pos($delblock) - $len2; 3261 $mathblock="%\n\\mbox{$AUXCMD\n$1\n}$AUXCMD\n"; 3262 next unless ( $mathblock =~ /ARRAYBLOCK/ or $mathblock =~ m/\{$ARRENV\}/ ); 3263 substr($delblock,$begin2,$len2)=$mathblock; 3264 pos($delblock) = $begin2 + length($mathblock); 3265 } 3266 ### if ($CITE2CMD) { 3267 ###### ${extraspace}(?:\[$brat0\]${extraspace}){0,2}\{$pat_n\})) .*?%%%\n 3268 ### $delblock=~s/($DELCMDOPEN\s*\\($CITE2CMD)(.*)$DELCMDCLOSE)/ 3269 ### # Replacement code 3270 ### {my ($aux,$all); 3271 ### $aux=$all=$1; 3272 ### $aux=~s#\n?($DELCMDOPEN|$DELCMDCLOSE)##g; 3273 ### $all."$aux$AUXCMD\n";}/sge; 3274 ### } 3275 ### # or protect \cite commands with \mbox 3276 ### if ($CITECMD) { 3277 ###### $delblock=~s/(\\($CITECMD)${extraspace}(?:\[$brat0\]${extraspace}){0,2}\{$pat_n\})(\s*)/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/msg ; 3278 ### $delblock=~s/(\\($CITECMD)${extraspace}(?:<$abrat0>${extraspace})?(?:\[$brat0\]${extraspace}){0,2}\{$pat_n\})(\s*)/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/msg ; 3279 ### } 3280 # if MBOXINLINEMATH is set, protect inlined math environments with an extra mbox 3281 if ( $MBOXINLINEMATH ) { 3282 # note additional \newline after command is omitted from output if right at the end of deleted block (otherwise a spurious empty line is generated) 3283 $delblock=~s/($math)(?:[\s\n]*)?/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/sg; 3284 } 3285 ###if ( defined($packages{"listings"} and $latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{color\}/)) { 3286 ### # change included verbatim environments 3287 ### $delblock =~ s/\\DIFverb\{/\\DIFDIFdelverb\{/g; 3288 ### $delblock =~ s/\\DIFlstinline/\\DIFDIFdellstinline/g; 3289 ###} 3290 # Mark deleted verbose commands 3291 $delblock =~ s/(${DELCMDOPEN}\\DIF((?:verb\*?|lstinline(?:\[$brat_n\])?)\{([-\d]*?)\}\s*).*)$/%\n\\DIFDIFdel$2${AUXCMD}\n$1/gm; 3292 if ( $CUSTOMDIFCMD ) { 3293 $delblock =~ s/(${DELCMDOPEN}.*)\\($CUSTOMDIFCMD)/$1${DELCMDCLOSE}\\DEL$2/gm; 3294 } 3295 3296 # splice in modified delblock 3297 substr($_,$begin,$len)=$delblock; 3298 pos = $begin + length($delblock); 3299 } 3300 ###writedebugfile($_,'postprocess'); 3301 3302 ### print STDERR "<<<$_>>>\n" if $debug; 3303 3304 3305 # make the array modification in added blocks 3306 while ( m/\\DIFaddbegin.*?\\DIFaddend/sg ) { 3307 $cnt=0; 3308 $len=length($&); 3309 $begin=pos($_) - $len; 3310 $addblock=$&; 3311 while ( $addblock =~ m/($math)(\s*)/sg ) { 3312 $cnt2=0; 3313 $len2=length($&); 3314 $begin2=pos($addblock) - $len2; 3315 $mathblock="%\n\\mbox{$AUXCMD\n$1\n}$AUXCMD\n"; 3316 next unless ( $mathblock =~ /ARRAYBLOCK/ or $mathblock =~ m/\{$ARRENV\}/) ; 3317 substr($addblock,$begin2,$len2)=$mathblock; 3318 pos($addblock) = $begin2 + length($mathblock); 3319 } 3320 # if MBOXINLINEMATH is set, protect inlined math environments with an extra mbox 3321 if ( $MBOXINLINEMATH ) { 3322 ##$addblock=~s/($math)/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/sg; 3323 $addblock=~s/($math)(?:[\s\n]*)?/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/sg; 3324 } 3325 ###if ( defined($packages{"listings"} and $latexdiffpreamble =~ /\\RequirePackage(?:\[$brat0\])?\{color\}/)) { 3326 # mark added verbatim commands 3327 $addblock =~ s/\\DIFverb/\\DIFDIFaddverb/g; 3328 $addblock =~ s/\\DIFlstinline/\\DIFDIFaddlstinline/g; 3329 if( $CUSTOMDIFCMD ) { 3330 $addblock =~ s/\\($CUSTOMDIFCMD)/\\ADD$1/g; 3331 } 3332 # markup the optional arguments of \item 3333 $addblock =~ s/(\\$ITEMCMD$extraspace(?:<$abrat0>)?$extraspace)\[($brat_n)\]/ 3334 @itemargs=splitlatex(substr($2,0,length($2))); 3335 $itemarg="[".join("",marktags("","",$ADDOPEN,$ADDCLOSE,"","",$ADDCOMMENT,\@itemargs))."]"; 3336 "$1$itemarg"/sge; # old substitution: $1\[$ADDOPEN$2$ADDCLOSE\] 3337 ###} 3338# splice in modified addblock 3339 substr($_,$begin,$len)=$addblock; 3340 pos = $begin + length($addblock); 3341 } 3342 3343 # Go through whole text, and by counting list environment commands, find out when we are within a list environment. 3344 # Within those restore deleted \item commands 3345 @textparts=split /(?<!$DELCMDOPEN)(\\(?:begin|end)\{$LISTENV\})/ ; 3346 @liststack=(); 3347 @newtextparts=map { 3348 ### print STDERR ":::::::: $_\n"; 3349 if ( ($listtype) = m/^\\begin\{($LISTENV)\}$/ ) { 3350 print STDERR "DEBUG: postprocess \\begin{$listtype}\n" if $debug; 3351 push @liststack,$listtype; 3352 } elsif ( ($listtype) = m/^\\end\{($LISTENV)\}$/ ) { 3353 print STDERR "DEBUG: postprocess \\end{$listtype}\n" if $debug; 3354 if (scalar @liststack > 0) { 3355 $listlast=pop(@liststack); 3356 ($listtype eq $listlast) or warn "Invalid nesting of list environments: $listlast environment closed by \\end{$listtype}."; 3357 } else { 3358 warn "WARNING: Invalid nesting of list environments: \\end{$listtype} encountered without matching \\begin{$listtype}.\n"; 3359 } 3360 } else { 3361 print STDERR "DEBUG: postprocess \@liststack=(",join(",",@liststack),")\n" if $debug; 3362 if (scalar @liststack > 0 ) { 3363 # we are within a list environment and should replace all item commands 3364 $_=restore_item_commands($_); 3365 } 3366 # else: we are outside a list environment and do not need to do anything 3367 } 3368 $_ } @textparts; # end of map command 3369 # replace the main text with the modified version 3370 $_= join("",@newtextparts); 3371 3372 3373 3374 3375 # Replace MATHMODE environments from step 1a above by the correct Math environment 3376 3377 # The next line is complicated. The negative look-ahead insertion makes sure that no \end{$MATHENV} (or other mathematical 3378 # environments) are between the \begin{$MATHENV} and \end{MATHMODE} commands. This is necessary as the minimal matching 3379 # is not globally minimal but only 'locally' (matching is beginning from the left side of the string) 3380 if ( $mathmarkup == FINE ) { 3381 1 while s/\\begin\{((?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}((?:.(?!(?:\\end\{(?:(?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}|\\begin\{MATHMODE})))*?)\\end\{MATHMODE}/\\begin{$1}$2\\end{$1}/s; 3382 1 while s/\\begin\{MATHMODE}((?:.(?!\\end\{MATHMODE}))*?)\\end\{((?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}/\\begin{$2}$1\\end{$2}/s; 3383 # convert remaining \begin{MATHMODE} \end{MATHMODE} (and not containing & or \\ )into MATHREPL environments 3384 s/\\begin\{MATHMODE\}((?:(.(?!(?<!\\)\&|\\\\))*)?)\\end\{MATHMODE\}/\\begin{$MATHREPL}$1\\end{$MATHREPL}/sg; 3385 # others into MATHARRREPL 3386 s/\\begin\{MATHMODE\}(.*?)\\end\{MATHMODE\}/\\begin{$MATHARRREPL}$1\\end{$MATHARRREPL}/sg; 3387 3388 # now look for AUXCMD math-mode pairs which have only comments (or empty lines between them), and remove the added commands 3389 s/\\begin\{((?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)\}$AUXCMD\n((?:\s*%.[^\n]*\n)*)\\end\{\1\}$AUXCMD\n/$2/sg; 3390 } else { 3391 # math modes OFF,WHOLE,COARSE: Convert \MATHBLOCKmath{..} commands back to environments 3392 s/\\MATHBLOCK($MATHENV|$MATHARRENV|SQUAREBRACKET)\{($pat_n)\}/\\begin{$1}$2\\end{$1}/sg; 3393 # convert ARRAYBLOCK.. commands back to environments 3394 s/\\ARRAYBLOCK($ARRENV)\{($pat_n)\}/\\begin{$1}$2\\end{$1}/sg; 3395 # get rid of the DIFANCHOR markers, first the delete comments, then everywhere 3396 s/%DIFDELCMD < \\DIFANCHOR(?:MATH|ARR)[BE] (?:\n%DIFDELCMD < )?%%%\n//g ; 3397 s/\\DIFANCHOR(?:MATH|ARR)[BE] //g; 3398 } 3399 3400 # Convert all PICTUREblock{..} commands back to the appropriate environments 3401 s/\\PICTUREBLOCK($PICTUREENV)\{($pat_n)\}/\\begin{$1}$2\\end{$1}/sg; 3402#0.5: # Remove all mark up within picture environments 3403# while ( m/\\begin\{($PICTUREENV)\}.*?\\end\{\1\}/sg ) { 3404# $cnt=0; 3405# $len=length($&); 3406# $begin=pos($_) - $len; 3407# $float=$&; 3408# $float =~ s/\\DIFaddbegin //g; 3409# $float =~ s/\\DIFaddend //g; 3410# $float =~ s/\\DIFadd\{($pat_n)\}/$1/g; 3411# $float =~ s/\\DIFdelbegin //g; 3412# $float =~ s/\\DIFdelend //g; 3413# $float =~ s/\\DIFdel\{($pat_n)\}//g; 3414# $float =~ s/$DELCMDOPEN.*//g; 3415# substr($_,$begin,$len)=$float; 3416# pos = $begin + length($float); 3417# } 3418 # Convert DIFadd, DIFdel, DIFFaddbegin , ... into varieties 3419 # within floats (currently recognised float environments: plate,table,figure 3420 # plus starred varieties). 3421 while ( m/\\begin\{($FLOATENV)\}.*?\\end\{\1\}/sg ) { 3422 $cnt=0; 3423 $len=length($&); 3424 $begin=pos($_) - $len; 3425 $float=$&; 3426 $float =~ s/\\DIFaddbegin /\\DIFaddbeginFL /g; 3427 $float =~ s/\\DIFaddend /\\DIFaddendFL /g; 3428 $float =~ s/\\DIFadd\{/\\DIFaddFL{/g; 3429 $float =~ s/\\DIFdelbegin /\\DIFdelbeginFL /g; 3430 $float =~ s/\\DIFdelend /\\DIFdelendFL /g; 3431 $float =~ s/\\DIFdel\{/\\DIFdelFL{/g; 3432 substr($_,$begin,$len)=$float; 3433 pos = $begin + length($float); 3434 } 3435 ### former location of undo renaming of \begin and \end in comments 3436 3437 # remove empty DIFCMD < lines 3438 s/^\Q${DELCMDOPEN}\E\n//msg; 3439 3440 # Expand hashes of verb and verbatim environments (note negative look behind assertion to not leak out of DIFDELCMD comments 3441 s/${DELCMDOPEN}\\($VERBATIMENV)\{([-\d]*?)\}/"${DELCMDOPEN}\\begin{${1}}".fromhash(\%verbhash,$2,$DELCMDOPEN)."${DELCMDOPEN}\\end{${1}}"/esg; 3442 # revert changes to verbatim environments for line diffs (and add code to mark up changes) 3443 s/(?<!$DELCMDOPEN)\\begin\{($VERBATIMLINEENV)\}(.*?)\\end\{\1\}/"". reverselinecomment($1, $2) .""/esg; 3444# # we do the same for deleted environments but additionally reinstate the framing commands 3445# s/$DELCMDOPEN\\begin\{($VERBATIMLINEENV)\}$extraspace(?:\[$brat0\])?$DELCMDCLOSE(.*?)$DELCMDOPEN\\end\{\1\}$DELCMDCLOSE/"\\begin{$1}". reverselinecomment($2) . "\\end{$1}"/esg; 3446## s/$DELCMDOPEN\\begin\{($VERBATIMLINEENV)\}($extraspace(?:\[$brat0\])?\s*)(?:\n|$DELCMDOPEN)*$DELCMDCLOSE((?:\%$DELCOMMENT$VERBCOMMENT.*?\n)*)($DELCMDOPEN\\end\{\1\}(?:\n|\s|$DELCMDOPEN)*$DELCMDCLOSE)/"SUBSTITUTION: \\begin{$1}$2 INTERIOR: |$3| END: |$4|"/esg; 3447 s/ # Deleted \begin command of verbatim environment (Captures $1: whole deleted command, $2: environment, $3: optional arguments with white space 3448 (\Q$DELCMDOPEN\E\\begin\{($VERBATIMLINEENV)\}(\Q$extraspace\E(?:\[$brat_n\])?\s*)(?:\n|\Q$DELCMDOPEN\E)*\Q$DELCMDCLOSE\E) 3449 # Interior of deleted verbatim environment should consist entirely of delete DIFVRB comments, i.e. match only lines beginning with % DIF < DIFVRB 3450 # Captures: $4: all lines combined 3451 ((?:\%\Q$DELCOMMENT$VERBCOMMENT\E[^\n]*?\n)*) 3452 # Deleted \end command of verbatim environment. Note that the type is forced to match the opening. Captures: $5: Whole deleted environment (previous way this line was written: (\Q$DELCMDOPEN\E\\end\{\2\}(?:\n|\s|\Q$DELCMDOPEN\E)*\Q$DELCMDCLOSE\E) 3453 (\Q$DELCMDOPEN\E\\end\{\2\}) 3454 / # Substitution part 3455 $1 # Leave expression as is 3456 . "$AUXCMD NEXT\n" # Mark the following line as an auxiliary command 3457 . "" # reinstate the original environment without options 3458 . reverselinecomment($2, "$3$4") # modify the body to change the markup; reverselinecomment parses for options 3459 . " $AUXCMD\n" # close the auxiliary environment 3460 . $5 # and again leave the original deleted closing environment as is 3461 /esgx; # Modifiers of substitution command 3462 # where changes have occurred in verbatim environment, change verbatim to DIFverbatim to allow mark-up 3463 # (I use the presence of optional paramater to verbatim environment as the marker - normal verbatim 3464 # environment does not take optional arguments) 3465 s/(?<!$DELCMDOPEN)\\begin\{(verbatim[*]?)\}(\[$brat_n\].*?)\\end\{\1\}/\\begin{DIF$1}$2\\end{DIF$1}/sg; 3466 3467 s/\\($VERBATIMENV)\{([-\d]*?)\}/"\\begin{${1}}".fromhash(\%verbhash,$2)."\\end{${1}}"/esg; 3468 3469 3470 # remove all \PAR tokens (taking care to properly keep commented out PAR's 3471 # from introducing uncommented newlines - next line) 3472 s/(%DIF < )([^\n]*?)\\PAR\n/$1$2\n$1\n/sg; 3473 # convert PAR commands which are on a line by themselves 3474 s/\n(\s*?)\\PAR\n/\n\n/sg; 3475 # convert remaining PAR commands (which are preceded by non-white space characters, usually "}" ($ADDCLOSE) 3476 s/\\PAR\n/\n\n/sg; 3477 3478 # package specific processing: 3479 if ( defined($packages{"endfloat"})) { 3480 #endfloat: make sure \begin{figure} and \end{figure} are always 3481 # on a line by themselves, similarly for table environment 3482 print STDERR "endfloat package detected.\n" if $verbose ; 3483 # eliminate whitespace before and after 3484 s/^(\s*)(\\(?:end|begin)\{(?:figure|table)\})(\s*?)$/$2/mg; 3485 # split lines with remaining characters before float environment conmmand 3486 s/^([^%]+)(\\(?:begin|end)\{(?:figure|table)\})/$1\n$2/mg; 3487 # split lines with remaining characters after float environment conmmand 3488 s/^((?:[^%]+)\\(?:begin|end)\{(?:figure|table)\}(?:\[[a-zA-Z]+\])?)(.+)((?:%.*)?)$/$1\n$2$3/mg; 3489 } 3490 3491 # Convert '\PERCENTAGE ' back into '\%' (the final question mark catches a special situation where due to a latter pre-processing step the ' ' becomes separated 3492 s/\\PERCENTAGE ?/\\%/g; 3493 # Convert '\DOLLAR ' back into '\$' 3494 s/\\DOLLAR /\\\$/g; 3495 3496 # undo renaming of the \begin and \end,{,} and dollars in comments 3497 3498 # although we only renamed $ in comments to DOLLARDIFF, we might have lost the % in unchanged verbatim blocks, so rename all 3499 s/DOLLARDIF/\$/g; 3500# Convert \begin{SQUAREBRACKET} \end{SQUAREBRACKET} into \[ \] 3501 s/\\end\{SQUAREBRACKET\}/\\\]/sg; 3502 s/\\begin\{SQUAREBRACKET\}/\\\[/sg; 3503# 4. Convert \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR} into $$ $$ 3504 s/\\begin\{DOLLARDOLLAR\}(.*?)\\end\{DOLLARDOLLAR\}/\$\$$1\$\$/sg; 3505# 5. Convert \SUPERSCRIPTNB{n} into ^n and \SUPERSCRIPT{nn} into ^{nnn} 3506 1 while s/\\SUPERSCRIPT(\s*\{($pat_n)\})/^$1/g ; 3507 1 while s/\\SUPERSCRIPTNB\{(\s*$pat0)\}/^$1/g ; 3508 # Convert \SUBSCRIPNB{n} into _n and \SUBCRIPT{nn} into _{nnn} 3509 1 while s/\\SUBSCRIPT(\s*\{($pat_n)\})/_$1/g ; 3510 1 while s/\\SUBSCRIPTNB\{(\s*$pat0)\}/_$1/g ; 3511 # Convert \SQRT{n} into \sqrt{n} and \SQRTNB{nn} into \sqrt nn 3512 1 while s/\\SQRT(\s*\{($pat_n)\})/\\sqrt$1/g ; 3513 1 while s/\\SQRTNB\{(\s*$pat0)\}/\\sqrt$1/g ; 3514 3515 1 while s/(%.*)\\CRIGHTBRACE (.*)$/$1\}$2/mg ; 3516 1 while s/(%.*)\\CLEFTBRACE (.*)$/$1\{$2/mg ; 3517 3518 3519# Change \QLEFTBRACE, \QRIGHTBRACE to \{,\} 3520 s/\\QLEFTBRACE /\\\{/sg; 3521 s/\\QRIGHTBRACE /\\\}/sg; 3522 s/\\AMPERSAND /\\&/sg; 3523 # Highligh added inline verbatim commands if possible 3524 if ( $latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{color\}/ ) { 3525 # wrap added verb commands with color commands 3526 s/\\DIFDIFadd((?:verb\*?|lstinline(?:\[$brat_n\])?)\{[-\d]*?\}[\s\n]*)/\{\\color{blue}$AUXCMD\n\\DIF$1%\n\}$AUXCMD\n/sg; 3527 s/\\DIFDIFdel((?:verb\*?|lstinline(?:\[$brat_n\])?)\{[-\d]*?\}[\s\n]*$AUXCMD)/\{\\color{red}${AUXCMD}\n\\DIF$1\n\}${AUXCMD}/sg; 3528 } else { 3529 # currently if colour markup is not used just remove the added mark 3530 s/\\DIFDIFadd(verb\*?|lstinline)/\\DIF$1/sg; 3531 s/\\DIFDIFdel((?:verb\*?|lstinline(?:\[$brat_n\])?)\{[-\d]*?\}[\s\n]*$AUXCMD\n)//sg; 3532 } 3533 # expand \verb and friends inline arguments 3534 s/\\DIF((?:DIFadd|DIFdel)?(?:verb\*?|lstinline(?:\[$brat_n\])?))\{([-\d]*?)\}/"\\${1}". fromhash(\%verbhash,$2)/esg; 3535 # add basicstyle color{blue} to added lstinline commands 3536 # finally add the comment to the ones not having an optional argument before 3537 ###s/\\DIFaddlstinline(?!\[)/\\lstinline\n[basicstyle=\\color{blue}]$AUXCMD\n/g; 3538 3539 return; 3540 } 3541} 3542 3543# $out = restore_item_commands($listenviron) 3544# short helper function for post-process, which restores deleted \item commands in its argument (as DIFAUXCMDs) 3545sub restore_item_commands { 3546 my ($string)=@_ ; 3547 my ($itemarg,@itemargs); 3548 $string =~ s/(\%DIFDELCMD < \s*(\\$ITEMCMD$extraspace)((?:<$abrat0>)?$extraspace)((?:\[$brat_n\])?)\s*((?:${cmdoptseq}\s*?)*)(?:\n|$DELCMDCLOSE))/ 3549 # if \item has an []argument, then mark up the argument as deleted) 3550 if (length($4)>0) { 3551 # use substr to exclude square brackets at end points 3552 @itemargs=splitlatex(substr($4,1,length($4)-2)); 3553 $itemarg="[".join("",marktags("","",$DELOPEN,$DELCLOSE,$DELCMDOPEN,$DELCMDCLOSE,$DELCOMMENT,\@itemargs))."]"; 3554 } else { 3555 $itemarg=""; 3556 } 3557 "$1$2$3$itemarg$AUXCMD\n"; ###.((length($5)>0) ? "%DIFDELCMD $5 $DELCMDCLOSE\n" : "") 3558 /sge; 3559 return($string); 3560} 3561 3562 3563# @auxlines=preprocess_preamble($oldpreamble,$newpreamble); 3564 # pre-process preamble by looking for commands used in \maketitle (title, author, date etc commands) 3565 # the list of commands is defined in CONTEXT2CMD 3566 # if found then use a bodydiff to mark up content, and replace the corresponding commands 3567 # in both preambles by marked up version to 'fool' the linediff (such that only body is marked up). 3568 # A special case are e.g. author commands being added (or removed) 3569 # 1. If commands are added, then the entire content is marked up as new, but also the lines are marked as new in the linediff 3570 # 2. If commands are removed, then the linediff will mark the line as deleted. The program returns 3571 # with $auxlines a text to be appended at the end of the preamble, which shows the respective fields as deleted 3572sub preprocess_preamble { 3573 my ($oldpreambleref,$newpreambleref)=(\$_[0],\$_[1]) ; 3574 my @auxlines=(); 3575 # Remember to use $$oldpreambleref to refer to oldpreamble 3576 my ($titlecmd,$titlecmdpat); 3577 my (@oldtitlecommands,@newtitlecommands ); 3578 my %oldhash = (); 3579 my %newhash = (); 3580 my ($line,$cmd,$optarg,$arg,$optargnew,$optargold,$optargdiff,$argold,$argnew,$argdiff,$auxline); 3581 3582 my $warnmsgdetail = <<EOF ; 3583 This should not occur for standard styles, but can occur for some specifiy styles, document classes, 3584 e.g. journal house styles. 3585 Workaround: Use --replace-context2cmd option to specifically set those commands, which are not repeated. 3586EOF 3587 3588 # resuse context2cmdlist to define these commands to look out for in preamble 3589 $titlecmd = "(?:".join("|",@CONTEXT2CMDLIST).")"; 3590 # as context2cmdlist is stored as regex, e.g. ((?-xism:^title$), we need to remove ^- fo 3591 # resue in a more complex regex 3592 $titlecmd =~ s/[\$\^]//g; 3593 # make sure to not match on comment lines: 3594 $titlecmdpat=qr/^(?:[^%\n]|\\%)*(\\($titlecmd)$extraspace(?:\[($brat_n)\])?(?:\{($pat_n)\}))/ms; 3595 ###print STDERR "DEBUG:",$titlecmdpat,"\n"; 3596 @oldtitlecommands= ( $$oldpreambleref =~ m/$titlecmdpat/g ); 3597 @newtitlecommands= ( $$newpreambleref =~ m/$titlecmdpat/g ); 3598 3599 3600 while ( @oldtitlecommands ) { 3601 $line=shift @oldtitlecommands; 3602 $cmd=shift @oldtitlecommands; 3603 $optarg=shift @oldtitlecommands; 3604 $arg=shift @oldtitlecommands; 3605 3606 if ( defined($oldhash{$cmd})) { 3607 warn "WARNING: $cmd is used twice in preamble of old file. Reverting to pure line diff mode for preamble.\n"; 3608 print STDERR $warnmsgdetail; 3609 return; 3610 } 3611 $oldhash{$cmd}=[ $line, $optarg, $arg ]; 3612 } 3613 while ( @newtitlecommands ) { 3614 $line=shift @newtitlecommands; 3615 $cmd=shift @newtitlecommands; 3616 $optarg=shift @newtitlecommands; 3617 $arg=shift @newtitlecommands; 3618 3619 if ( defined($newhash{$cmd})) { 3620 warn "$cmd is used twice in preamble of new file. Reverting to pure line diff mode for preamble.\n"; 3621 print STDERR $warnmsgdetail; 3622 return; 3623 } 3624 $newhash{$cmd}=[ $line, $optarg, $arg ]; 3625 } 3626 foreach $cmd ( keys %newhash ) { 3627 if ( defined($newhash{$cmd}->[1])) { 3628 $optargnew=$newhash{$cmd}->[1]; 3629 } else { 3630 $optargnew=""; 3631 } 3632 if ( defined($oldhash{$cmd}->[1])) { 3633 $optargold=$oldhash{$cmd}->[1]; 3634 } else { 3635 $optargold=""; 3636 } 3637 3638 if ( defined($oldhash{$cmd}->[2]) ) { 3639 $argold=$oldhash{$cmd}->[2]; 3640 } else { 3641 $argold=""; 3642 } 3643 $argnew=$newhash{$cmd}->[2]; 3644 $argdiff="{" . join("",bodydiff($argold,$argnew)) ."}"; 3645 # Replace \RIGHTBRACE by } 3646 $argdiff =~ s/\\RIGHTBRACE/}/g; 3647 3648 if ( length $optargnew ) { 3649 $optargdiff="[".join("",bodydiff($optargold,$optargnew))."]" ; 3650 $optargdiff =~ s/\\DIFaddbegin /\\DIFaddbeginFL /g; 3651 $optargdiff =~ s/\\DIFaddend /\\DIFaddendFL /g; 3652 $optargdiff =~ s/\\DIFadd\{/\\DIFaddFL{/g; 3653 $optargdiff =~ s/\\DIFdelbegin /\\DIFdelbeginFL /g; 3654 $optargdiff =~ s/\\DIFdelend /\\DIFdelendFL /g; 3655 $optargdiff =~ s/\\DIFdel\{/\\DIFdelFL{/g; 3656 } else { 3657 $optargdiff=""; 3658 } 3659 ### print STDERR "DEBUG s/\\Q$newhash{$cmd}->[0]\\E/\\$cmd$optargdiff$argdiff/s\n"; 3660 # Note: \Q and \E force literal interpretation of what it between them but allow 3661 # variable interpolation, such that e.g. \title matches just that and not TAB-itle 3662 $$newpreambleref=~s/\Q$newhash{$cmd}->[0]\E/\\$cmd$optargdiff$argdiff/s; 3663 # replace this in old preamble if necessary 3664 if ( defined($oldhash{$cmd}->[0])) { 3665 $$oldpreambleref=~s/\Q$oldhash{$cmd}->[0]\E/\\$cmd$optargdiff$argdiff/s ; 3666 } 3667 ### print STDERR "DEBUG NEW PRE ".$$newpreambleref."\n"; 3668 } 3669 3670 foreach $cmd ( keys %oldhash ) { 3671 # if this has already been dealt with above can just skip 3672 next if defined($newhash{$cmd}) ; 3673 $argold=$oldhash{$cmd}->[2]; 3674 $argdiff="{" . join("",bodydiff($argold,"")) ."}"; 3675 if ( defined($oldhash{$cmd}->[1])) { 3676 $optargold=$oldhash{$cmd}->[1]; 3677 $optargdiff="[".join("",bodydiff($optargold,""))."]" ; 3678 $optargdiff =~ s/\\DIFdelbegin /\\DIFdelbeginFL /g; 3679 $optargdiff =~ s/\\DIFdelend /\\DIFdelendFL /g; 3680 $optargdiff =~ s/\\DIFdel\{/\\DIFdelFL{/g; 3681 } else { 3682 $optargdiff=""; 3683 } 3684 $auxline = "\\$cmd$optargdiff$argdiff"; 3685 $auxline =~s/$/$AUXCMD/sg; 3686 push @auxlines,$auxline; 3687 } 3688 # add auxcmd comment to highlight added lines 3689 return(@auxlines); 3690} 3691 3692 3693 3694# @diffs=linediff(\@seq1, \@seq2) 3695# mark up lines like this 3696#%DIF mm-mmdnn 3697#%< old deleted line(s) 3698#%DIF ------- 3699#%DIF mmann-nn 3700#new appended line %< 3701#%DIF ------- 3702# Future extension: mark change explicitly 3703# Assumes: traverse_sequence traverses deletions before insertions in changed sequences 3704# all line numbers relative to line 0 (first line of real file) 3705sub linediff { 3706 my $seq1 = shift ; 3707 my $seq2 = shift ; 3708 3709 my $block = []; 3710 my $retseq = []; 3711 my @begin=('','',''); # dummy initialisation 3712 my $instring ; 3713 3714 my $discard = sub { @begin=('d',$_[0],$_[1]) unless scalar @$block ; 3715 push(@$block, "%DIF < " . $seq1->[$_[0]]) }; 3716 my $add = sub { if (! scalar @$block) { 3717 @begin=('a',$_[0],$_[1]) ;} 3718 elsif ( $begin[0] eq 'd' ) { 3719 $begin[0]='c'; $begin[2]=$_[1]; 3720 push(@$block, "%DIF -------") } 3721 push(@$block, $seq2->[$_[1]] . " %DIF > " ) }; 3722 my $match = sub { if ( scalar @$block ) { 3723 if ( $begin[0] eq 'd' && $begin[1]!=$_[0]-1) { 3724 $instring = sprintf "%%DIF %d-%dd%d",$begin[1],$_[0]-1,$begin[2]; } 3725 elsif ( $begin[0] eq 'a' && $begin[2]!=$_[1]-1) { 3726 $instring = sprintf "%%DIF %da%d-%d",$begin[1],$begin[2],$_[1]-1; } 3727 elsif ( $begin[0] eq 'c' ) { 3728 $instring = sprintf "%%DIF %sc%s", 3729 ($begin[1]==$_[0]-1) ? "$begin[1]" : $begin[1]."-".($_[0]-1) , 3730 ($begin[2]==$_[1]-1) ? "$begin[2]" : $begin[2]."-".($_[1]-1) ; } 3731 else { 3732 $instring = sprintf "%%DIF %d%s%d",$begin[1],$begin[0],$begin[2]; } 3733 push @$retseq, $instring,@$block, "%DIF -------" ; 3734 $block = []; 3735 } 3736 push @$retseq, $seq2->[$_[1]] 3737 }; 3738 # key function: remove multiple spaces (such that insertion or deletion of redundant white space is not reported) 3739 my $keyfunc = sub { join(" ",split(" ",shift())) }; 3740 3741 traverse_sequences($seq1,$seq2, { MATCH=>$match, DISCARD_A=>$discard, DISCARD_B=>$add }, $keyfunc ); 3742 push @$retseq, @$block if scalar @$block; 3743 3744 return wantarray ? @$retseq : $retseq ; 3745} 3746 3747 3748 3749# init_regex_arr_data(\@array,"TOKEN INIT") 3750# scans DATA file handel for line "%% TOKEN INIT" line 3751# then appends each line not beginning with % into array (as a quoted regex) 3752# This is used for command lists and configuration variables, but the processing is slightly 3753# different: 3754# For lists, the regular expression is extended to include beginning (^) and end ($) markers, to require full-string matching 3755# For configuration variables (and all others), simply an unadorned list is copied 3756sub init_regex_arr_data { 3757 my ($arr,$token)=@_; 3758 my $copy=0; 3759 my ($mode); 3760 if ($token =~ m/COMMANDS/ ) { 3761 $mode=0; # Reading command list 3762 } else { 3763 $mode=1; # Reading configuration variables 3764 } 3765 3766 while (<DATA>) { 3767 if ( m/^%%BEGIN $token\s*$/ ) { 3768 $copy=1; 3769 next; 3770 } elsif ( m/^%%END $token\s*$/ ) { 3771 last; } 3772 chomp; 3773 if ( $mode==0 ) { 3774# print STDERR "DEBUG init_regex_arr_data regex >$_<\n" if ($debug && $copy); 3775 push (@$arr,qr/^$_$/) if ( $copy && !/^%/ ) ; 3776 } elsif ($mode==1) { 3777 push (@$arr,"$_") if ( $copy && !/^%/ ) ; 3778 } 3779 } 3780 seek DATA,0,0; # rewind DATA handle to file begin 3781} 3782 3783 3784# init_regex_arr_ext(\@array,$arg) 3785# appends array with regular expressions. 3786# if arg is a file name, then read in list of regular expressions from that file 3787# (one expression per line) 3788# Otherwise treat arg as a comma separated list of regular expressions 3789sub init_regex_arr_ext { 3790 my ($arr,$arg)=@_; 3791 if ( -f $arg ) { 3792 init_regex_arr_file($arr,$arg); 3793 } else { 3794 init_regex_arr_list($arr,$arg); 3795 } 3796} 3797 3798# init_regex_arr_file(\@array,$fname) 3799# appends array with regular expressions. 3800# Read in list of regular expressions from $fname 3801# (one expression per line) 3802sub init_regex_arr_file { 3803 my ($arr,$fname)=@_; 3804 open(FILE,"$fname") or die ("Couldn't open $fname: $!"); 3805 while (<FILE>) { 3806 chomp; 3807 next if /^\s*#/ || /^\s*%/ || /^\s*$/ ; 3808 push (@$arr,qr/^$_$/); 3809 } 3810 close(FILE); 3811} 3812 3813# init_regex_arr_list(\@array,$arg) 3814# appends array with regular expressions. 3815# read from comma separated list of regular expressions ($arg) 3816sub init_regex_arr_list { 3817 my ($arr,$arg)=@_; 3818 my $regex; 3819 ### print STDERR "DEBUG init_regex_arr_list arg >$arg<\n" if $debug; 3820 foreach $regex (split(qr/(?<!\\),/,$arg)) { 3821 $regex =~ s/\\,/,/g; 3822 print STDERR "DEBUG init_regex_arr_list regex >$regex<\n" if $debug; 3823 push (@$arr,qr/^$regex$/); 3824 } 3825} 3826 3827 3828#exetime() returns time since last execution of this command 3829#exetime(1) resets this time 3830my $lasttime=-1; # global variable for persistence 3831sub exetime { 3832 my $reset=0; 3833 my $retval; 3834 if ((scalar @_) >=1) { 3835 $reset=shift; 3836 } 3837 if ($reset) { 3838 $lasttime=times(); 3839 } 3840 else { 3841 $retval=times()-$lasttime; 3842 $lasttime=$lasttime+$retval; 3843 return($retval); 3844 } 3845} 3846 3847 3848sub usage { 3849 die <<"EOF"; 3850Usage: $0 [options] old.tex new.tex > diff.tex 3851 3852Compares two latex files and writes tex code to stdout, which has the same format as new.tex but 3853has all changes relative to old.tex marked up or commented. Note that old.tex and new.tex need to 3854be real files (not pipes or similar) as they are opened twice. 3855 3856--type=markupstyle 3857-t markupstyle Add code to preamble for selected markup style 3858 Available styles: UNDERLINE CTRADITIONAL TRADITIONAL CFONT FONTSTRIKE INVISIBLE 3859 CHANGEBAR CCHANGEBAR CULINECHBAR CFONTCHBAR BOLD PDFCOMMENT 3860 [ Default: UNDERLINE ] 3861 3862--subtype=markstyle 3863-s markstyle Add code to preamble for selected style for bracketing 3864 commands (e.g. to mark changes in margin) 3865 Available styles: SAFE MARGIN DVIPSCOL COLOR ZLABEL ONLYCHANGEDPAGE (LABEL)* 3866 [ Default: SAFE ] 3867 * LABEL subtype is deprecated 3868 3869--floattype=markstyle 3870-f markstyle Add code to preamble for selected style which 3871 replace standard marking and markup commands within floats 3872 (e.g., marginal remarks cause an error within floats 3873 so marginal marking can be disabled thus) 3874 Available styles: FLOATSAFE IDENTICAL 3875 [ Default: FLOATSAFE ] 3876 3877--encoding=enc 3878-e enc Specify encoding of old.tex and new.tex. Typical encodings are 3879 ascii, utf8, latin1, latin9. A list of available encodings can be 3880 obtained by executing 3881 perl -MEncode -e 'print join ("\\n",Encode->encodings( ":all" )) ;' 3882 [Default encoding is utf8 unless the first few lines of the preamble contain 3883 an invocation "\\usepackage[..]{inputenc} in which case the 3884 encoding chosen by this command is asssumed. Note that ASCII (standard 3885 latex) is a subset of utf8] 3886 3887--preamble=file 3888-p file Insert file at end of preamble instead of auto-generating 3889 preamble. The preamble must define the following commands 3890 \\DIFaddbegin,\\DIFaddend,\\DIFadd{..}, 3891 \\DIFdelbegin,\\DIFdelend,\\DIFdel{..}, 3892 and varieties for use within floats 3893 \\DIFaddbeginFL,\\DIFaddendFL,\\DIFaddFL{..}, 3894 \\DIFdelbeginFL,\\DIFdelendFL,\\DIFdelFL{..} 3895 (If this option is set -t, -s, and -f options 3896 are ignored.) 3897 3898--exclude-safecmd=exclude-file 3899--exclude-safecmd="cmd1,cmd2,..." 3900-A exclude-file 3901--replace-safecmd=replace-file 3902--append-safecmd=append-file 3903--append-safecmd="cmd1,cmd2,..." 3904-a append-file Exclude from, replace or append to the list of regex 3905 matching commands which are safe to use within the 3906 scope of a \\DIFadd or \\DIFdel command. The file must contain 3907 one Perl-RegEx per line (Comment lines beginning with # or % are 3908 ignored). A literal comma within the comma-separated list must be 3909 escaped thus "\\,", Note that the RegEx needs to match the whole of 3910 the token, i.e., /^regex\$/ is implied and that the initial 3911 "\\" of the command is not included. The --exclude-safecmd 3912 and --append-safecmd options can be combined with the --replace-safecmd 3913 option and can be used repeatedly to add cumulatively to the lists. 3914 3915--exclude-textcmd=exclude-file 3916--exclude-textcmd="cmd1,cmd2,..." 3917-X exclude-file 3918--replace-textcmd=replace-file 3919--append-textcmd=append-file 3920--append-textcmd="cmd1,cmd2,..." 3921-x append-file Exclude from, replace or append to the list of regex 3922 matching commands whose last argument is text. See 3923 entry for --exclude-safecmd directly above for further details. 3924 3925--replace-context1cmd=replace-file 3926--append-context1cmd=append-file 3927--append-context1cmd="cmd1,cmd2,..." 3928 Replace or append to the list of regex matching commands 3929 whose last argument is text but which require a particular 3930 context to work, e.g. \\caption will only work within a figure 3931 or table. These commands behave like text commands, except when 3932 they occur in a deleted section, when they are disabled, but their 3933 argument is shown as deleted text. 3934 3935--replace-context2cmd=replace-file 3936--append-context2cmd=append-file 3937--append-context2cmd="cmd1,cmd2,..." 3938 As corresponding commands for context1. The only difference is that 3939 context2 commands are completely disabled in deleted sections, including 3940 their arguments. 3941 context2 commands are also the only commands in the preamble, whose argument will 3942 be processed in word-by-word mode (which only works, if they occur no more than 3943 once in the preamble). 3944 3945--exclude-mboxsafecmd=exclude-file 3946--exclude-mboxsafecmd="cmd1,cmd2,..." 3947--append-mboxsafecmd=append-file 3948--append-mboxsafecmd="cmd1,cmd2,..." 3949 Define safe commands, which additionally need to be protected by encapsulating 3950 in an \\mbox{..}. This is sometimes needed to get around incompatibilities 3951 between external packages and the ulem package, which is used for highlighting 3952 in the default style UNDERLINE as well as CULINECHBAR CFONTSTRIKE 3953 3954 3955 3956--config var1=val1,var2=val2,... 3957-c var1=val1,.. Set configuration variables. 3958-c configfile Available variables: 3959 ARRENV (RegEx) 3960 COUNTERCMD (RegEx) 3961 FLOATENV (RegEx) 3962 ITEMCMD (RegEx) 3963 LISTENV (RegEx) 3964 MATHARRENV (RegEx) 3965 MATHARRREPL (String) 3966 MATHENV (RegEx) 3967 MATHREPL (String) 3968 MINWORDSBLOCK (Integer) 3969 PICTUREENV (RegEx) 3970 SCALEDELGRAPHICS (Float) 3971 VERBATIMENV (RegEx) 3972 VERBATIMLINEENV (RegEx) 3973 CUSTOMDIFCMD (RegEx) 3974 This option can be repeated. 3975 3976--add-to-config varenv1=pattern1,varenv2=pattern2 3977 For configuration variables containing a regular expression (essentially those ending 3978 in ENV, and COUNTERCMD) this provides an alternative way to modify the configuration 3979 variables. Instead of setting the complete pattern, with this option it is possible to add an 3980 alternative pattern. varenv must be one of the variables listed above that take a regular 3981 expression as argument, and pattern is any regular expression (which might need to be 3982 protected from the shell by quotation). Several patterns can be added at once by using semi-colons 3983 to separate them, e.g. --add-to-config "LISTENV=myitemize;myenumerate,COUNTERCMD=endnote" 3984 3985--packages=pkg1,pkg2,.. 3986 Tell latexdiff that .tex file is processed with the packages in list 3987 loaded. This is normally not necessary if the .tex file includes the 3988 preamble, as the preamble is automatically scanned for \\usepackage commands. 3989 Use of the --packages option disables automatic scanning, so if for any 3990 reason package specific parsing needs to be switched off, use --packages=none. 3991 The following packages trigger special behaviour: 3992 endfloat hyperref amsmath apacite siunitx cleveref glossaries mhchem chemformula/chemmacros 3993 biblatex 3994 [ Default: scan the preamble for \\usepackage commands to determine 3995 loaded packages.] 3996 3997--show-preamble Print generated or included preamble commands to stdout. 3998 3999--show-safecmd Print list of regex matching and excluding safe commands. 4000 4001--show-textcmd Print list of regex matching and excluding commands with text argument. 4002 4003--show-config Show values of configuration variables 4004 4005--show-all Show all of the above 4006 4007 NB For all --show commands, no old.tex or new.tex file needs to be given, and no 4008 differencing takes place. 4009 4010Other configuration options: 4011 4012--allow-spaces Allow spaces between bracketed or braced arguments to commands 4013 [Default requires arguments to directly follow each other without 4014 intervening spaces] 4015 4016--math-markup=level Determine granularity of markup in displayed math environments: 4017 Possible values for level are (both numerical and text labels are acceptable): 4018 off or 0: suppress markup for math environments. Deleted equations will not 4019 appear in diff file. This mode can be used if all the other modes 4020 cause invalid latex code. 4021 whole or 1: Differencing on the level of whole equations. Even trivial changes 4022 to equations cause the whole equation to be marked changed. This 4023 mode can be used if processing in coarse or fine mode results in 4024 invalid latex code. 4025 coarse or 2: Detect changes within equations marked up with a coarse 4026 granularity; changes in equation type (e.g.displaymath to equation) 4027 appear as a change to the complete equation. This mode is recommended 4028 for situations where the content and order of some equations are still 4029 being changed. [Default] 4030 fine or 3: Detect small change in equations and mark up and fine granularity. 4031 This mode is most suitable, if only minor changes to equations are 4032 expected, e.g. correction of typos. 4033 4034--graphics-markup=level Change highlight style for graphics embedded with \\includegraphics commands 4035 Possible values for level: 4036 none,off or 0: no highlighting for figures 4037 new-only or 1: surround newly added or changed figures with a blue frame [Default] 4038 both or 2: highlight new figures with a blue frame and show deleted figures 4039 at reduced scale, and crossed out with a red diagonal cross. Use configuration 4040 variable SCALEDELGRAPHICS to set size of deleted figures. 4041 Note that changes to the optional parameters will make the figure appear as changed 4042 to latexdiff, and this figure will thus be highlighted. 4043 4044--disable-citation-markup 4045--disable-auto-mbox Suppress citation markup and markup of other vulnerable commands in styles 4046 using ulem (UNDERLINE,FONTSTRIKE, CULINECHBAR) 4047 (the two options are identical and are simply aliases) 4048 4049--enable-citation-markup 4050--enforce-auto-mbox Protect citation commands and other vulnerable commands in changed sections 4051 with \\mbox command, i.e. use default behaviour for ulem package for other packages 4052 (the two options are identical and are simply aliases) 4053 4054Miscelleneous options 4055 4056--label=label 4057-L label Sets the labels used to describe the old and new files. The first use 4058 of this option sets the label describing the old file and the second 4059 use of the option sets the label for the new file. 4060 [Default: use the filename and modification dates for the label] 4061 4062--no-label Suppress inclusion of old and new file names as comment in output file 4063 4064--visible-label Include old and new filenames (or labels set with --label option) as 4065 visible output 4066 4067--flatten Replace \\input and \\include commands within body by the content 4068 of the files in their argument. If \\includeonly is present in the 4069 preamble, only those files are expanded into the document. However, 4070 no recursion is done, i.e. \\input and \\include commands within 4071 included sections are not expanded. The included files are assumed to 4072 be located in the same directories as the old and new master files, 4073 respectively, making it possible to organise files into old and new directories. 4074 --flatten is applied recursively, so inputted files can contain further 4075 \\input statements. Also handles files included by the import package 4076 (\\import and \\subimport), and \\subfile command. 4077 4078--filter-script=filterscript Run files through this filterscript (full path preferred) before processing. 4079 The filterscript must take STDIN input and output to STDOUT. 4080 When coupled with --flatten, each file will be run through the filter as it is brought in. 4081 4082--ignore-filter-stderr When running with --filter-script, STDERR from the script may cause readability issues. 4083 Turn this flag on to ignore STDERR from the filter script. 4084 4085 4086 4087--help 4088-h Show this help text. 4089 4090--ignore-warnings Suppress warnings about inconsistencies in length between input 4091 and parsed strings and missing characters. 4092 4093--verbose 4094-V Output various status information to stderr during processing. 4095 Default is to work silently. 4096 4097--version Show version number. 4098 4099Internal options: 4100These options are mostly for automated use by latexdiff-vc. They can be used directly, but 4101the API should be considered less stable than for the other options. 4102 4103--no-links Suppress generation of hyperreferences, used for minimal diffs 4104 (option --only-changes of latexdiff-vc). 4105EOF 4106} 4107 4108=head1 NAME 4109 4110latexdiff - determine and markup differences between two latex files 4111 4112=head1 SYNOPSIS 4113 4114B<latexdiff> [ B<OPTIONS> ] F<old.tex> F<new.tex> > F<diff.tex> 4115 4116=head1 DESCRIPTION 4117 4118Briefly, I<latexdiff> is a utility program to aid in the management of 4119revisions of latex documents. It compares two valid latex files, here 4120called C<old.tex> and C<new.tex>, finds significant differences 4121between them (i.e., ignoring the number of white spaces and position 4122of line breaks), and adds special commands to highlight the 4123differences. Where visual highlighting is not possible, e.g. for changes 4124in the formatting, the differences are 4125nevertheless marked up in the source. Note that old.tex and new.tex need to 4126be real files (not pipes or similar) as they are opened twice (unless C<--encoding> option is used) 4127 4128The program treats the preamble differently from the main document. 4129Differences between the preambles are found using line-based 4130differencing (similarly to the Unix diff command, but ignoring white 4131spaces). A comment, "S<C<%DIF E<gt>>>" is appended to each added line, i.e. a 4132line present in C<new.tex> but not in C<old.tex>. Discarded lines 4133 are deactivated by prepending "S<C<%DIF E<lt>>>". Changed blocks are preceded by 4134comment lines giving information about line numbers in the original files. Where there are insignificant 4135differences, the resulting file C<diff.tex> will be similar to 4136C<new.tex>. At the end of the preamble, the definitions for I<latexdiff> markup commands are inserted. 4137In differencing the main body of the text, I<latexdiff> attempts to 4138satisfy the following guidelines (in order of priority): 4139 4140=over 3 4141 4142=item 1 4143 4144If both C<old.tex> and C<new.tex> are valid LaTeX, then the resulting 4145C<diff.tex> should also be valid LateX. (NB If a few plain TeX commands 4146are used within C<old.tex> or C<new.tex> then C<diff.tex> is not 4147guaranteed to work but usually will). 4148 4149=item 2 4150 4151Significant differences are determined on the level of 4152individual words. All significant differences, including differences 4153between comments should be clearly marked in the resulting source code 4154C<diff.tex>. 4155 4156=item 3 4157 4158If a changed passage contains text or text-producing commands, then 4159running C<diff.tex> through LateX should produce output where added 4160and discarded passages are highlighted. 4161 4162=item 4 4163 4164Where there are insignificant differences, e.g. in the positioning of 4165line breaks, C<diff.tex> should follow the formatting of C<new.tex> 4166 4167=back 4168 4169For differencing the same algorithm as I<diff> is used but words 4170instead of lines are compared. An attempt is made to recognize 4171blocks which are completely changed such that they can be marked up as a unit. 4172Comments are differenced line by line 4173but the number of spaces within comments is ignored. Commands including 4174all their arguments are generally compared as one unit, i.e., no mark-up 4175is inserted into the arguments of commands. However, for a selected 4176number of commands (for example, C<\caption> and all sectioning 4177commands) the last argument is known to be text. This text is 4178split into words and differenced just as ordinary text (use options to 4179show and change the list of text commands, see below). As the 4180algorithm has no detailed knowledge of LaTeX, it assumes all pairs of 4181curly braces immediately following a command (i.e. a sequence of 4182letters beginning with a backslash) are arguments for that command. 4183As a restriction to condition 1 above it is thus necessary to surround 4184all arguments with curly braces, and to not insert 4185extraneous spaces. For example, write 4186 4187 \section{\textem{This is an emphasized section title}} 4188 4189and not 4190 4191 \section {\textem{This is an emphasized section title}} 4192 4193or 4194 4195 \section\textem{This is an emphasized section title} 4196 4197even though all varieties are the same to LaTeX (but see 4198B<--allow-spaces> option which allows the second variety). 4199 4200For environments whose content does not conform to standard LaTeX or 4201where graphical markup does not make sense all markup commands can be 4202removed by setting the PICTUREENV configuration variable, set by 4203default to C<picture> and C<DIFnomarkup> environments; see B<--config> 4204option). The latter environment (C<DIFnomarkup>) can be used to 4205protect parts of the latex file where the markup results in illegal 4206markup. You have to surround the offending passage in both the old and 4207new file by C<\begin{DIFnomarkup}> and C<\end{DIFnomarkup}>. You must 4208define the environment in the preambles of both old and new 4209documents. I prefer to define it as a null-environment, 4210 4211C<\newenvironment{DIFnomarkup}{}{}> 4212 4213but the choice is yours. Any markup within the environment will be 4214removed, and generally everything within the environment will just be 4215taken from the new file. 4216 4217It is also possible to difference files which do not have a preamble. 4218 In this case, the file is processed in the main document 4219mode, but the definitions of the markup commands are not inserted. 4220 4221All markup commands inserted by I<latexdiff> begin with "C<\DIF>". Added 4222blocks containing words, commands or comments which are in C<new.tex> 4223but not in C<old.tex> are marked by C<\DIFaddbegin> and C<\DIFaddend>. 4224Discarded blocks are marked by C<\DIFdelbegin> and C<\DIFdelend>. 4225Within added blocks all text is highlighted with C<\DIFadd> like this: 4226C<\DIFadd{Added text block}> 4227Selected `safe' commands can be contained in these text blocks as well 4228(use options to show and change the list of safe commands, see below). 4229All other commands as well as braces "{" and "}" are never put within 4230the scope of C<\DIFadd>. Added comments are marked by prepending 4231"S<C<%DIF E<gt> >>". 4232 4233Within deleted blocks text is highlighted with C<\DIFdel>. Deleted 4234comments are marked by prepending "S<C<%DIF E<lt> >>". Non-safe command 4235and curly braces within deleted blocks are commented out with 4236"S<C<%DIFDELCMD E<lt> >>". 4237 4238 4239 4240=head1 OPTIONS 4241 4242=head2 Preamble 4243 4244The following options determine the visual markup style by adding the appropriate 4245command definitions to the preamble. See the end of this section for a description of 4246available styles. 4247 4248=over 4 4249 4250=item B<--type=markupstyle> or 4251B<-t markupstyle> 4252 4253Add code to preamble for selected markup style. This option defines 4254C<\DIFadd> and C<\DIFdel> commands. 4255Available styles: 4256 4257C<UNDERLINE CTRADITIONAL TRADITIONAL CFONT FONTSTRIKE INVISIBLE 4258CHANGEBAR CCHANGEBAR CULINECHBAR CFONTCHBAR BOLD PDFCOMMENT> 4259 4260[ Default: C<UNDERLINE> ] 4261 4262=item B<--subtype=markstyle> or 4263B<-s markstyle> 4264 4265Add code to preamble for selected style for bracketing 4266commands (e.g. to mark changes in margin). This option defines 4267C<\DIFaddbegin>, C<\DIFaddend>, C<\DIFdelbegin> and C<\DIFdelend> commands. 4268Available styles: C<SAFE MARGIN COLOR DVIPSCOL ZLABEL ONLYCHANGEDPAGE (LABEL)*> 4269 4270[ Default: C<SAFE> ] 4271* Subtype C<LABEL> is deprecated 4272 4273=item B<--floattype=markstyle> or 4274B<-f markstyle> 4275 4276Add code to preamble for selected style which 4277replace standard marking and markup commands within floats 4278(e.g., marginal remarks cause an error within floats 4279so marginal marking can be disabled thus). This option defines all 4280C<\DIF...FL> commands. 4281Available styles: C<FLOATSAFE TRADITIONALSAFE IDENTICAL> 4282 4283[ Default: C<FLOATSAFE> ] 4284 4285=item B<--encoding=enc> or 4286B<-e enc> 4287 4288Specify encoding of old.tex and new.tex. Typical encodings are 4289C<ascii>, C<utf8>, C<latin1>, C<latin9>. A list of available encodings can be 4290obtained by executing 4291 4292C<perl -MEncode -e 'print join ("\n",Encode->encodings( ":all" )) ;' > 4293 4294If this option is used, then old.tex, new.tex are only opened once. 4295[Default encoding is utf8 unless the first few lines of the preamble contain 4296an invocation C<\usepackage[..]{inputenc}> in which case the 4297encoding chosen by this command is asssumed. Note that ASCII (standard 4298latex) is a subset of utf8] 4299 4300=item B<--preamble=file> or 4301B<-p file> 4302 4303Insert file at end of preamble instead of generating 4304preamble. The preamble must define the following commands 4305C<\DIFaddbegin, \DIFaddend, \DIFadd{..}, 4306\DIFdelbegin,\DIFdelend,\DIFdel{..},> 4307and varieties for use within floats 4308C<\DIFaddbeginFL, \DIFaddendFL, \DIFaddFL{..}, 4309\DIFdelbeginFL, \DIFdelendFL, \DIFdelFL{..}> 4310(If this option is set B<-t>, B<-s>, and B<-f> options 4311are ignored.) 4312 4313=item B<--packages=pkg1,pkg2,..> 4314 4315Tell latexdiff that .tex file is processed with the packages in list 4316loaded. This is normally not necessary if the .tex file includes the 4317preamble, as the preamble is automatically scanned for C<\usepackage> commands. 4318Use of the B<--packages> option disables automatic scanning, so if for any 4319reason package specific parsing needs to be switched off, use B<--packages=none>. 4320The following packages trigger special behaviour: 4321 4322=over 8 4323 4324=item C<amsmath> 4325 4326Configuration variable MATHARRREPL is set to C<align*> (Default: C<eqnarray*>). (Note that many of the 4327amsmath array environments are already recognised by default as such) 4328 4329=item C<endfloat> 4330 4331Ensure that C<\begin{figure}> and C<\end{figure}> always appear by themselves on a line. 4332 4333=item C<hyperref> 4334 4335Change name of C<\DIFadd> and C<\DIFdel> commands to C<\DIFaddtex> and C<\DIFdeltex> and 4336define new C<\DIFadd> and C<\DIFdel> commands, which provide a wrapper for these commands, 4337using them for the text but not for the link defining command (where any markup would cause 4338errors). 4339 4340=item C<apacite>, C<biblatex> 4341 4342Redefine the commands recognised as citation commands. 4343 4344=item C<siunitx> 4345 4346Treat C<\SI> as equivalent to citation commands (i.e. protect with C<\mbox> if markup style uses ulem package. 4347 4348=item C<cleveref> 4349 4350Treat C<\cref,\Cref>, etc as equivalent to citation commands (i.e. protect with C<\mbox> if markup style uses ulem package. 4351 4352=item C<glossaries> 4353 4354Define most of the glossaries commands as safe, protecting them with \mbox'es where needed 4355 4356=item C<mhchem> 4357 4358Treat C<\ce> as a safe command, i.e. it will be highlighted (note that C<\cee> will not be highlighted in equations as this leads to processing errors) 4359 4360=item C<chemformula> or C<chemmacros> 4361 4362Treat C<\ch> as a safe command outside equations, i.e. it will be highlighted (note that C<\ch> will not be highlighted in equations as this leads to processing errors) 4363 4364 4365=back 4366 4367[ Default: scan the preamble for C<\usepackage> commands to determine 4368 loaded packages. ] 4369 4370 4371 4372=item B<--show-preamble> 4373 4374Print generated or included preamble commands to stdout. 4375 4376=back 4377 4378=head2 Configuration 4379 4380=over 4 4381 4382=item B<--exclude-safecmd=exclude-file> or 4383B<-A exclude-file> or B<--exclude-safecmd="cmd1,cmd2,..."> 4384 4385=item B<--replace-safecmd=replace-file> 4386 4387=item B<--append-safecmd=append-file> or 4388B<-a append-file> or B<--append-safecmd="cmd1,cmd2,..."> 4389 4390Exclude from, replace or append to the list of regular expressions (RegEx) 4391matching commands which are safe to use within the 4392scope of a C<\DIFadd> or C<\DIFdel> command. The file must contain 4393one Perl-RegEx per line (Comment lines beginning with # or % are 4394ignored). Note that the RegEx needs to match the whole of 4395the token, i.e., /^regex$/ is implied and that the initial 4396"\" of the command is not included. 4397The B<--exclude-safecmd> and B<--append-safecmd> options can be combined with the -B<--replace-safecmd> 4398option and can be used repeatedly to add cumulatively to the lists. 4399 B<--exclude-safecmd> 4400and B<--append-safecmd> can also take a comma separated list as input. If a 4401comma for one of the regex is required, escape it thus "\,". In most cases it 4402will be necessary to protect the comma-separated list from the shell by putting 4403it in quotation marks. 4404 4405=item B<--exclude-textcmd=exclude-file> or 4406B<-X exclude-file> or B<--exclude-textcmd="cmd1,cmd2,..."> 4407 4408=item B<--replace-textcmd=replace-file> 4409 4410=item B<--append-textcmd=append-file> or 4411B<-x append-file> or B<--append-textcmd="cmd1,cmd2,..."> 4412 4413Exclude from, replace or append to the list of regular expressions 4414matching commands whose last argument is text. See 4415entry for B<--exclude-safecmd> directly above for further details. 4416 4417 4418=item B<--replace-context1cmd=replace-file> 4419 4420=item B<--append-context1cmd=append-file> or 4421 4422=item B<--append-context1cmd="cmd1,cmd2,..."> 4423 4424Replace or append to the list of regex matching commands 4425whose last argument is text but which require a particular 4426context to work, e.g. C<\caption> will only work within a figure 4427or table. These commands behave like text commands, except when 4428they occur in a deleted section, when they are disabled, but their 4429argument is shown as deleted text. 4430 4431=item B<--replace-context2cmd=replace-file> 4432 4433=item B<--append-context2cmd=append-file> or 4434 4435=item B<--append-context2cmd="cmd1,cmd2,..."> 4436 4437As corresponding commands for context1. The only difference is that 4438context2 commands are completely disabled in deleted sections, including 4439their arguments. 4440 4441context2 commands are also the only commands in the preamble, whose argument will be processed in 4442word-by-word mode (which only works, if they occur no more than once in the preamble). The algorithm currently cannot cope with repeated context2 commands in the preamble, as they occur e.g. for the C<\author> argument in some journal styles (not in the standard styles, though 4443If such a repetition is detected, the whole preamble will be processed in line-by-line mode. In such a case, use C<--replace-context2cmd> option to just select the commands, which should be processed and are not used repeatedly in the preamble. 4444 4445 4446 4447=item B<--exclude-mboxsafecmd=exclude-file> or B<--exclude-mboxsafecmd="cmd1,cmd2,..."> 4448 4449=item B<--append-mboxsafecmd=append-file> or B<--append-mboxsafecmd="cmd1,cmd2,..."> 4450 4451Define safe commands, which additionally need to be protected by encapsulating 4452in an C<\mbox{..}>. This is sometimes needed to get around incompatibilities 4453between external packages and the ulem package, which is used for highlighting 4454in the default style UNDERLINE as well as CULINECHBAR CFONTSTRIKE 4455 4456 4457 4458 4459 4460=item B<--config var1=val1,var2=val2,...> or B<-c var1=val1,..> 4461 4462=item B<-c configfile> 4463 4464Set configuration variables. The option can be repeated to set different 4465variables (as an alternative to the comma-separated list). 4466Available variables (see below for further explanations): 4467 4468C<ARRENV> (RegEx) 4469 4470C<COUNTERCMD> (RegEx) 4471 4472C<CUSTODIFCMD> (RegEx) 4473 4474C<FLOATENV> (RegEx) 4475 4476C<ITEMCMD> (RegEx) 4477 4478C<LISTENV> (RegEx) 4479 4480C<MATHARRENV> (RegEx) 4481 4482C<MATHARRREPL> (String) 4483 4484C<MATHENV> (RegEx) 4485 4486C<MATHREPL> (String) 4487 4488C<MINWORDSBLOCK> (Integer) 4489 4490C<PICTUREENV> (RegEx) 4491 4492C<SCALEDELGRAPHICS> (Float) 4493 4494 4495=item B<--add-to-config varenv1=pattern1,varenv2=pattern2,...> 4496 4497For configuration variables, which are a regular expression (essentially those ending 4498in ENV, COUNTERCMD and CUSTOMDIFCMD, see list above) this option provides an alternative way to modify the configuration 4499variables. Instead of setting the complete pattern, with this option it is possible to add an 4500alternative pattern. C<varenv> must be one of the variables listed above that take a regular 4501expression as argument, and pattern is any regular expression (which might need to be 4502protected from the shell by quotation). Several patterns can be added at once by using semi-colons 4503to separate them, e.g. C<--add-to-config "LISTENV=myitemize;myenumerate,COUNTERCMD=endnote"> 4504 4505=item B<--show-safecmd> 4506 4507Print list of RegEx matching and excluding safe commands. 4508 4509=item B<--show-textcmd> 4510 4511Print list of RegEx matching and excluding commands with text argument. 4512 4513=item B<--show-config> 4514 4515Show values of configuration variables. 4516 4517=item B<--show-all> 4518 4519Combine all --show commands. 4520 4521NB For all --show commands, no C<old.tex> or C<new.tex> file needs to be specified, and no 4522differencing takes place. 4523 4524=back 4525 4526=head2 Other configuration options: 4527 4528=over 4 4529 4530=item B<--allow-spaces> 4531 4532Allow spaces between bracketed or braced arguments to commands. Note 4533that this option might have undesirable side effects (unrelated scope 4534might get lumpeded with preceding commands) so should only be used if the 4535default produces erroneous results. (Default requires arguments to 4536directly follow each other without intervening spaces). 4537 4538=item B<--math-markup=level> 4539 4540Determine granularity of markup in displayed math environments: 4541Possible values for level are (both numerical and text labels are acceptable): 4542 4543C<off> or C<0>: suppress markup for math environments. Deleted equations will not 4544appear in diff file. This mode can be used if all the other modes 4545cause invalid latex code. 4546 4547C<whole> or C<1>: Differencing on the level of whole equations. Even trivial changes 4548to equations cause the whole equation to be marked changed. This 4549mode can be used if processing in coarse or fine mode results in 4550invalid latex code. 4551 4552C<coarse> or C<2>: Detect changes within equations marked up with a coarse 4553granularity; changes in equation type (e.g.displaymath to equation) 4554appear as a change to the complete equation. This mode is recommended 4555for situations where the content and order of some equations are still 4556being changed. [Default] 4557 4558C<fine> or C<3>: Detect small change in equations and mark up at fine granularity. 4559This mode is most suitable, if only minor changes to equations are 4560expected, e.g. correction of typos. 4561 4562=item B<--graphics-markup=level> 4563 4564 Change highlight style for graphics embedded with C<\includegraphics> commands. 4565 4566Possible values for level: 4567 4568C<none>, C<off> or C<0>: no highlighting for figures 4569 4570C<new-only> or C<1>: surround newly added or changed figures with a blue frame [Default if graphicx package loaded] 4571 4572C<both> or C<2>: highlight new figures with a blue frame and show deleted figures at reduced 4573scale, and crossed out with a red diagonal cross. Use configuration 4574variable SCALEDELGRAPHICS to set size of deleted figures. 4575 4576Note that changes to the optional parameters will make the figure appear as changed 4577to latexdiff, and this figure will thus be highlighted 4578 4579=item B<--disable-citation-markup> or B<--disable-auto-mbox> 4580 4581Suppress citation markup and markup of other vulnerable commands in styles 4582using ulem (UNDERLINE,FONTSTRIKE, CULINECHBAR) 4583(the two options are identical and are simply aliases) 4584 4585=item B<--enable-citation-markup> or B<--enforce-auto-mbox> 4586 4587Protect citation commands and other vulnerable commands in changed sections 4588with C<\mbox> command, i.e. use default behaviour for ulem package for other packages 4589(the two options are identical and are simply aliases) 4590 4591=back 4592 4593=head2 Miscellaneous 4594 4595=over 4 4596 4597=item B<--verbose> or B<-V> 4598 4599Output various status information to stderr during processing. 4600Default is to work silently. 4601 4602=item B<--driver=type> 4603 4604Choose driver for changebar package (only relevant for styles using 4605 changebar: CCHANGEBAR CFONTCHBAR CULINECHBAR CHANGEBAR). Possible 4606drivers are listed in changebar manual, e.g. pdftex,dvips,dvitops 4607 [Default: dvips] 4608 4609=item B<--ignore-warnings> 4610 4611Suppress warnings about inconsistencies in length between input and 4612parsed strings and missing characters. These warning messages are 4613often related to non-standard latex or latex constructions with a 4614syntax unknown to C<latexdiff> but the resulting difference argument 4615is often fully functional anyway, particularly if the non-standard 4616latex only occurs in parts of the text which have not changed. 4617 4618=item B<--label=label> or 4619B<-L label> 4620 4621Sets the labels used to describe the old and new files. The first use 4622of this option sets the label describing the old file and the second 4623use of the option sets the label for the new file, i.e. set both 4624labels like this C<-L labelold -L labelnew>. 4625[Default: use the filename and modification dates for the label] 4626 4627=item B<--no-label> 4628 4629Suppress inclusion of old and new file names as comment in output file 4630 4631=item B<--visible-label> 4632 4633Include old and new filenames (or labels set with C<--label> option) as 4634visible output. 4635 4636=item B<--flatten> 4637 4638Replace C<\input> and C<\include> commands within body by the content 4639of the files in their argument. If C<\includeonly> is present in the 4640preamble, only those files are expanded into the document. However, 4641no recursion is done, i.e. C<\input> and C<\include> commands within 4642included sections are not expanded. The included files are assumed to 4643 be located in the same directories as the old and new master files, 4644respectively, making it possible to organise files into old and new directories. 4645--flatten is applied recursively, so inputted files can contain further 4646C<\input> statements. Also handles files included by the import package 4647(C<\import> and C<\subimport>), and C<\subfile> command. 4648 4649Use of this option might result in prohibitive processing times for 4650larger documents, and the resulting difference document 4651no longer reflects the structure of the input documents. 4652 4653=item B<--filter-script=filterscript> 4654 4655Run files through this filterscript (full path preferred) before processing. 4656The filterscript must take STDIN input and output to STDOUT. 4657When coupled with --flatten, each file will be run through the filter as it is brought in. 4658 4659=item B<--ignore-filter-stderr> 4660 4661When running with --filter-script, STDERR from the script may cause readability issues. 4662Turn this flag on to ignore STDERR from the filter script. 4663 4664 4665 4666=item B<--help> or 4667B<-h> 4668 4669Show help text 4670 4671=item B<--version> 4672 4673Show version number 4674 4675=back 4676 4677 4678=head2 Internal options 4679 4680These options are mostly for automated use by latexdiff-vc. They can be used directly, but the API should be considered less stable than for the other options. 4681 4682=over 4 4683 4684=item B<--no-links> 4685 4686Suppress generation of hyperreferences, used for minimal diffs (option --only-changes of latexdiff-vc) 4687 4688=back 4689 4690 4691=head2 Predefined styles 4692 4693=head2 Major types 4694 4695The major type determine the markup of plain text and some selected latex commands outside floats by defining the markup commands C<\DIFadd{...}> and C<\DIFdel{...}> . 4696 4697=over 10 4698 4699=item C<UNDERLINE> 4700 4701Added text is wavy-underlined and blue, discarded text is struck out and red 4702(Requires color and ulem packages). Overstriking does not work in displayed math equations such that deleted parts of equation are underlined, not struck out (this is a shortcoming inherent to the ulem package). 4703 4704=item C<CTRADITIONAL> 4705 4706Added text is blue and set in sans-serif, and a red footnote is created for each discarded 4707piece of text. (Requires color package) 4708 4709=item C<TRADITIONAL> 4710 4711Like C<CTRADITIONAL> but without the use of color. 4712 4713=item C<CFONT> 4714 4715Added text is blue and set in sans-serif, and discarded text is red and very small size. 4716 4717=item C<FONTSTRIKE> 4718 4719Added tex is set in sans-serif, discarded text small and struck out 4720 4721=item C<CCHANGEBAR> 4722 4723Added text is blue, and discarded text is red. Additionally, the changed text is marked with a bar in the margin (Requires color and changebar packages). 4724 4725=item C<CFONTCHBAR> 4726 4727Like C<CFONT> but with additional changebars (Requires color and changebar packages). 4728 4729=item C<CULINECHBAR> 4730 4731Like C<UNDERLINE> but with additional changebars (Requires color, ulem and changebar packages). 4732 4733=item C<CHANGEBAR> 4734 4735No mark up of text, but mark margins with changebars (Requires changebar package). 4736 4737=item C<INVISIBLE> 4738 4739No visible markup (but generic markup commands will still be inserted. 4740 4741=item C<BOLD> 4742 4743Added text is set in bold face, discarded is not shown. 4744 4745=item C<PDFCOMMENT> 4746 4747The pdfcomment package is used to underline new text, and mark deletions with a PDF comment. Note that this markup might appear differently or not at all based on the pdf viewer used. The viewer with best support for pdf markup is probably acroread. This style is only recommended if the number of differences is small. 4748 4749=back 4750 4751=head2 Subtypes 4752 4753The subtype defines the commands that are inserted at the begin and end of added or discarded blocks, irrespectively of whether these blocks contain text or commands (Defined commands: C<\DIFaddbegin, \DIFaddend, \DIFdelbegin, \DIFdelend>) 4754 4755=over 10 4756 4757=item C<SAFE> 4758 4759No additional markup (Recommended choice) 4760 4761=item C<MARGIN> 4762 4763Mark beginning and end of changed blocks with symbols in the margin nearby (using 4764the standard C<\marginpar> command - note that this sometimes moves somewhat 4765from the intended position. 4766 4767=item C<COLOR> 4768 4769An alternative way of marking added passages in blue, and deleted ones in red. 4770(It is recommeneded to use instead the main types to effect colored markup, 4771although in some cases coloring with dvipscol can be more complete, for example 4772with citation commands). 4773 4774=item C<DVIPSCOL> 4775 4776An alternative way of marking added passages in blue, and deleted ones in red. Note 4777that C<DVIPSCOL> only works with the dvips converter, e.g. not pdflatex. 4778(it is recommeneded to use instead the main types to effect colored markup, 4779although in some cases coloring with dvipscol can be more complete). 4780 4781 4782=item C<ZLABEL> 4783 4784can be used to highlight only changed pages, but requires post-processing. It is recommend to not call this option manually but use C<latexdiff-vc> with C<--only-changes> option. Alternatively, use the script given within preamble of diff files made using this style. 4785 4786=item C<ONLYCHANGEDPAGE> 4787 4788also highlights changed pages, without the need for post-processing, but might not work reliably if 4789there is floating material (figures, tables). 4790 4791=item C<LABEL> 4792 4793is similar to C<ZLABEL>, but does not need the zref package and works less reliably (deprecated). 4794 4795=back 4796 4797=head2 Float Types 4798 4799Some of the markup used in the main text might cause problems when used within 4800floats (e.g. figures or tables). For this reason alternative versions of all 4801markup commands are used within floats. The float type defines these alternative commands. 4802 4803=over 10 4804 4805=item C<FLOATSAFE> 4806 4807Use identical markup for text as in the main body, but set all commands marking the begin and end of changed blocks to null-commands. You have to choose this float type if your subtype is C<MARGIN> as C<\marginpar> does not work properly within floats. 4808 4809=item C<TRADITIONALSAFE> 4810 4811Mark additions the same way as in the main text. Deleted environments are marked by angular brackets \[ and \] and the deleted text is set in scriptscript size. This float type should always be used with the C<TRADITIONAL> and C<CTRADITIONAL> markup types as the \footnote command does not work properly in floating environments. 4812 4813=item C<IDENTICAL> 4814 4815Make no difference between the main text and floats. 4816 4817=back 4818 4819 4820=head2 Configuration Variables 4821 4822=over 10 4823 4824=item C<ARRENV> 4825 4826If a match to C<ARRENV> is found within an inline math environment within a deleted or added block, then the inlined math 4827is surrounded by C<\mbox{>...C<}>. This is necessary as underlining does not work within inlined array environments. 4828 4829[ Default: C<ARRENV>=S<C<(?:array|[pbvBV]matrix)> > 4830 4831=item C<COUNTERCMD> 4832 4833If a command in a deleted block which is also in the textcmd list matches C<COUNTERCMD> then an 4834additional command C<\addtocounter{>F<cntcmd>C<}{-1}>, where F<cntcmd> is the matching command, is appended in the diff file such that the numbering in the diff file remains synchronized with the 4835numbering in the new file. 4836 4837[ Default: C<COUNTERCMD>=C<(?:footnote|part|section|subsection> ... 4838 4839C<|subsubsection|paragraph|subparagraph)> ] 4840 4841=item C<CUSTOMDIFCMD> 4842 4843This option is for advanced users and allows definition of special versions of commands, which do not work as safe commands. 4844 4845Commands in C<CUSTOMDIFCMD> that occur in added or deleted blocks will be given an ADD or DEL prefix. 4846The prefixed versions of the command must be defined in the preamble, either by putting them 4847in the preamble of at least the new file, or by creating a custom preamble file (Option --preamble). 4848For example the command C<\blindtext> (from package blindtext) does not interact well with underlining, so that 4849for the standard markup type, it is not satisfactory to define it as a safe command. Instead, a customised versions 4850without underlining can be defined in the preamble: 4851 4852C<\newcommand{\DELblindtext}{{\color{red}\blindtext}}> 4853 4854C<\newcommand{\ADDblindtext}{{\color{blue}\blindtext}}> 4855 4856and then latexdiff should be invoked with the option C<-c CUSTOMDIFCMD=blindtext>. 4857 4858[ Default: none ] 4859 4860=item C<FLOATENV> 4861 4862Environments whose name matches the regular expression in C<FLOATENV> are 4863considered floats. Within these environments, the I<latexdiff> markup commands 4864are replaced by their FL variaties. 4865 4866[ Default: S<C<(?:figure|table|plate)[\w\d*@]*> >] 4867 4868=item C<ITEMCMD> 4869 4870Commands representing new item line with list environments. 4871 4872[ Default: \C<item> ] 4873 4874=item C<LISTENV> 4875 4876Environments whose name matches the regular expression in C<LISTENV> are list environments. 4877 4878[ Default: S<C<(?:itemize|enumerate|description)> >] 4879 4880=item C<MATHENV>,C<MATHREPL> 4881 4882If both \begin and \end for a math environment (environment name matching C<MATHENV> or \[ and \]) 4883are within the same deleted block, they are replaced by a \begin and \end commands for C<MATHREPL> 4884rather than being commented out. 4885 4886[ Default: C<MATHENV>=S<C<(?:displaymath|equation)> >, C<MATHREPL>=S<C<displaymath> >] 4887 4888=item C<MATHARRENV>,C<MATHARRREPL> 4889 4890as C<MATHENV>,C<MATHREPL> but for equation arrays 4891 4892[ Default: C<MATHARRENV>=S<C<eqnarray\*?> >, C<MATHREPL>=S<C<eqnarray> >] 4893 4894=item C<MINWORDSBLOCK> 4895 4896Minimum number of tokens required to form an independent block. This value is 4897used in the algorithm to detect changes of complete blocks by merging identical text parts of less than C<MINWORDSBLOCK> to the preceding added and discarded parts. 4898 4899[ Default: 3 ] 4900 4901=item C<PICTUREENV> 4902 4903Within environments whose name matches the regular expression in C<PICTUREENV> 4904all latexdiff markup is removed (in pathologic cases this might lead to 4905inconsistent markup but this situation should be rare). 4906 4907[ Default: S<C<(?:picture|DIFnomarkup)[\w\d*@]*> >] 4908 4909=item C<SCALEDELGRAPHICS> 4910 4911If C<--graphics-markup=both> is chosen, C<SCALEDELGRAPHICS> is the factor, by which deleted figures will be scaled (i.e. 0.5 implies they are shown at half linear size). 4912 4913[ Default: 0.5 ] 4914 4915=item C<VERBATIMENV> 4916 4917RegEx describing environments like verbatim, whose contents should be taken verbatim. The content of these environments will not be processed in any way: 4918deleted content is commented out, new content is not marked up 4919 4920[ Default: S<C<comment> > ] 4921 4922=item C<VERBATIMLINEENV> 4923 4924RegEx describing environments like verbatim, whose contents should be taken verbatim. The content of environments described by VERBATIMLINEENV are compared in 4925line mode, and changes are marked up using the listings package. The markup style is set based on the chosen mains markup type (Option -t), or on an analysis 4926of the preamble. 4927Note that "listings.sty" must be installed. If this file is not found the fallback solution is to 4928treat VERBATIMLINEENV environments treated exactly the same way as VERBATIMENV environments. 4929 4930[ Default: S<C<(?:verbatim[*]?|lstlisting> > ] 4931 4932=back 4933 4934=head1 COMMON PROBLEMS AND FAQ 4935 4936=over 10 4937 4938=item Citations result in overfull boxes 4939 4940There is an incompatibility between the C<ulem> package, which C<latexdiff> uses for underlining and striking out in the UNDERLINE style, 4941the default style, and the way citations are generated. In order to be able to mark up citations properly, they are enclosed with an C<\mbox> 4942command. As mboxes cannot be broken across lines, this procedure frequently results in overfull boxes, possibly obscuring the content as it extends beyond the right margin. The same occurs for some other packages (e.g., siunitx). If this is a problem, you have two possibilities. 4943 49441. Use C<CFONT> type markup (option C<-t CFONT>): If this markup is chosen, then changed citations are no longer marked up 4945with the wavy line (additions) or struck out (deletions), but are still highlighted in the appropriate color, and deleted text is shown with a different font. Other styles not using the C<ulem> package will also work. 4946 49472. Choose option C<--disable-citation-markup> which turns off the marking up of citations: deleted citations are no longer shown, and 4948added citations are shown without markup. (This was the default behaviour of latexdiff at versions 0.6 and older) 4949 4950For custom packages you can define the commands which need to be protected by C<\mbox> with C<--append-mboxsafecmd> and C<--excludemboxsafecmd> options 4951(submit your lists of command as feature request at github page to set the default behaviour of future versions, see section 6) 4952 4953=item Changes in complicated mathematical equations result in latex processing errors 4954 4955Try options C<--math-markup=whole>. If even that fails, you can turn off mark up for equations with C<--math-markup=off>. 4956 4957=item How can I just show the pages where changes had been made 4958 4959Use options -C<-s ZLABEL> (some postprocessing required) or C<-s ONLYCHANGEDPAGE>. C<latexdiff-vc --ps|--pdf> with C<--only-changes> option takes care of 4960the post-processing for you (requires zref package to be installed). 4961 4962=back 4963 4964=head1 BUGS 4965 4966=over 10 4967 4968=item Option allow-spaces not implemented entirely consistently. It breaks 4969the rules that number and type of white space does not matter, as 4970different numbers of inter-argument spaces are treated as significant. 4971 4972=back 4973 4974Please submit bug reports using the issue tracker of the github repository page I<https://github.com/ftilmann/latexdiff.git>, 4975or send them to I<tilmann -- AT -- gfz-potsdam.de>. Include the version number of I<latexdiff> 4976(from comments at the top of the source or use B<--version>). If you come across latex 4977files that are error-free and conform to the specifications set out 4978above, and whose differencing still does not result in error-free 4979latex, please send me those files, ideally edited to only contain the 4980offending passage as long as that still reproduces the problem. If your 4981file relies on non-standard class files, you must include those. I will not 4982look at examples where I have trouble to latex the original files. 4983 4984=head1 SEE ALSO 4985 4986L<latexrevise>, L<latexdiff-vc> 4987 4988=head1 PORTABILITY 4989 4990I<latexdiff> does not make use of external commands and thus should run 4991on any platform supporting Perl 5.6 or higher. If files with encodings 4992other than ASCII or UTF-8 are processed, Perl 5.8 or higher is required. 4993 4994The standard version of I<latexdiff> requires installation of the Perl package 4995C<Algorithm::Diff> (available from I<www.cpan.org> - 4996I<http://search.cpan.org/~nedkonz/Algorithm-Diff-1.15>) but a stand-alone 4997version, I<latexdiff-so>, which has this package inlined, is available, too. 4998I<latexdiff-fast> requires the I<diff> command to be present. 4999 5000=head1 AUTHOR 5001 5002Version 1.3.0 5003Copyright (C) 2004-2018 Frederik Tilmann 5004 5005This program is free software; you can redistribute it and/or modify 5006it under the terms of the GNU General Public License Version 3 5007 5008Contributors of fixes and additions: V. Kuhlmann, J. Paisley, N. Becker, T. Doerges, K. Huebner, 5009T. Connors, Sebastian Gouezel and many others. 5010Thanks to the many people who sent in bug reports, feature suggestions, and other feedback. 5011 5012=cut 5013 5014__END__ 5015%%BEGIN SAFE COMMANDS 5016% Regex matching commands which can safely be in the 5017% argument of a \DIFadd or \DIFdel command (leave out the \) 5018arabic 5019dashbox 5020emph 5021fbox 5022framebox 5023hspace\*? 5024math.* 5025makebox 5026mbox 5027pageref 5028ref 5029symbol 5030raisebox 5031rule 5032text.* 5033shortstack 5034usebox 5035dag 5036ddag 5037copyright 5038pounds 5039S 5040P 5041oe 5042OE 5043ae 5044AE 5045aa 5046AA 5047o 5048O 5049l 5050L 5051frac 5052ss 5053sqrt 5054ldots 5055cdots 5056vdots 5057ddots 5058alpha 5059beta 5060gamma 5061delta 5062epsilon 5063varepsilon 5064zeta 5065eta 5066theta 5067vartheta 5068iota 5069kappa 5070lambda 5071mu 5072nu 5073xi 5074pi 5075varpi 5076rho 5077varrho 5078sigma 5079varsigma 5080tau 5081upsilon 5082phi 5083varphi 5084chi 5085psi 5086omega 5087Gamma 5088Delta 5089Theta 5090Lambda 5091Xi 5092Pi 5093Sigma 5094Upsilon 5095Phi 5096Psi 5097Omega 5098ps 5099mp 5100times 5101div 5102ast 5103star 5104circ 5105bullet 5106cdot 5107cap 5108cup 5109uplus 5110sqcap 5111vee 5112wedge 5113setminus 5114wr 5115diamond 5116(?:big)?triangle.* 5117lhd 5118rhd 5119unlhd 5120unrhd 5121oplus 5122ominus 5123otimes 5124oslash 5125odot 5126bigcirc 5127d?dagger 5128amalg 5129leq 5130prec 5131preceq 5132ll 5133(?:sq)?su[bp]set(?:eq)? 5134in 5135vdash 5136geq 5137succ(?:eq)? 5138gg 5139ni 5140dashv 5141equiv 5142sim(?:eq)? 5143asymp 5144approx 5145cong 5146neq 5147doteq 5148propto 5149models 5150perp 5151mid 5152parallel 5153bowtie 5154Join 5155smile 5156frown 5157.*arrow 5158(?:long)?mapsto 5159.*harpoon.* 5160leadsto 5161aleph 5162hbar 5163imath 5164jmath 5165ell 5166wp 5167Re 5168Im 5169mho 5170prime 5171emptyset 5172nabla 5173surd 5174top 5175bot 5176angle 5177forall 5178exists 5179neg 5180flat 5181natural 5182sharp 5183backslash 5184partial 5185infty 5186Box 5187Diamond 5188triangle 5189clubsuit 5190diamondsuit 5191heartsuit 5192spadesuit 5193sum 5194prod 5195coprod 5196int 5197oint 5198big(?:sq)?c[au]p 5199bigvee 5200bigwedge 5201bigodot 5202bigotimes 5203bigoplus 5204biguplus 5205(?:arc)?(?:cos|sin|tan|cot)h? 5206csc 5207arg 5208deg 5209det 5210dim 5211exp 5212gcd 5213hom 5214inf 5215ker 5216lg 5217lim 5218liminf 5219limsup 5220ln 5221log 5222max 5223min 5224Pr 5225sec 5226sup 5227bibfield 5228bibinfo 5229[Hclbkdruvt] 5230[`'^"~=.] 5231_ 5232AMPERSAND 5233(SUPER|SUB)SCRIPTNB 5234(SUPER|SUB)SCRIPT 5235SQRT 5236SQRTNB 5237PERCENTAGE 5238DOLLAR 5239%%END SAFE COMMANDS 5240 5241%%BEGIN TEXT COMMANDS 5242% Regex matching commands with a text argument (leave out the \) 5243addcontents.* 5244cc 5245closing 5246chapter 5247dashbox 5248emph 5249encl 5250fbox 5251framebox 5252footnote 5253footnotetext 5254framebox 5255href 5256intertext 5257part 5258(sub){0,2}section\*? 5259(sub)?paragraph\*? 5260makebox 5261mbox 5262opening 5263parbox 5264raisebox 5265savebox 5266sbox 5267shortintertext 5268shortstack 5269sidenote 5270signature 5271text.* 5272value 5273underline 5274sqrt 5275(SUPER|SUB)SCRIPT 5276%%END TEXT COMMANDS 5277 5278%%BEGIN CONTEXT1 COMMANDS 5279% Regex matching commands with a text argument (leave out the \), which will fail out of context. These commands behave like text commands, except when they occur in a deleted section, where they are disabled, but their argument is shown as deleted text. 5280caption 5281subcaption 5282%%END CONTEXT1 COMMANDS 5283 5284%%BEGIN CONTEXT2 COMMANDS 5285% Regex matching commands with a text argument (leave out the \), which will fail out of context. As corresponding commands for context1. The only difference is that context2 commands are completely disabled in deleted sections, including their arguments. 5286title 5287author 5288date 5289institute 5290%%END CONTEXT2 COMMANDS 5291 5292%% CONFIGURATION variabe defaults 5293%%BEGIN LISTENV CONFIG 5294itemize 5295description 5296enumerate 5297%%END LISTENV CONFIG 5298 5299%%BEGIN FLOATENV CONFIG 5300figure[\w\d*@]* 5301table[\w\d*@]* 5302plate[\w\d*@]* 5303%%END FLOATENV CONFIG 5304 5305%%BEGIN PICTUREENV CONFIG 5306picture[\w\d*@]* 5307tikzpicture[\w\d*@]* 5308DIFnomarkup 5309%%END PICTUREENV CONFIG 5310 5311%%BEGIN MATHENV CONFIG 5312equation[*]? 5313displaymath 5314DOLLARDOLLAR 5315%%END MATHENV CONFIG 5316 5317%%BEGIN MATHARRENV CONFIG 5318eqnarray[*]? 5319align[*]? 5320alignat[*]? 5321gather[*]? 5322multline[*]? 5323flalign[*]? 5324%%END MATHARRENV CONFIG 5325 5326%%BEGIN ARRENV CONFIG 5327aligned 5328gathered 5329array 5330[pbvBV]?matrix 5331smallmatrix 5332cases 5333split 5334%%END ARRENV CONFIG 5335 5336%%BEGIN COUNTERCMD CONFIG 5337footnote 5338part 5339chapter 5340section 5341subsection 5342subsubsection 5343paragraph 5344subparagraph 5345%%END COUNTERCMD CONFIG 5346 5347%%BEGIN VERBATIMENV CONFIG 5348comment 5349%%END VERBATIMENV CONFIG 5350 5351%%BEGIN VERBATIMLINEENV CONFIG 5352lstlisting 5353verbatim[*]? 5354%%END VERBATIMLINEENV CONFIG 5355 5356%%BEGIN CUSTOMDIFCMD CONFIG 5357%%END CUSTOMDIFCMD CONFIG 5358 5359%%% TYPES (Commands for highlighting changed blocks) 5360 5361%DIF UNDERLINE PREAMBLE 5362\RequirePackage[normalem]{ulem} 5363\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1} 5364\providecommand{\DIFadd}[1]{{\protect\color{blue}\uwave{#1}}} 5365\providecommand{\DIFdel}[1]{{\protect\color{red}\sout{#1}}} 5366%DIF END UNDERLINE PREAMBLE 5367 5368%DIF CTRADITIONAL PREAMBLE 5369\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1} 5370\RequirePackage[stable]{footmisc} 5371\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf} 5372\providecommand{\DIFadd}[1]{{\protect\color{blue} \sf #1}} 5373\providecommand{\DIFdel}[1]{{\protect\color{red} [..\footnote{removed: #1} ]}} 5374%DIF END CTRADITIONAL PREAMBLE 5375 5376%DIF TRADITIONAL PREAMBLE 5377\RequirePackage[stable]{footmisc} 5378\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf} 5379\providecommand{\DIFadd}[1]{{\sf #1}} 5380\providecommand{\DIFdel}[1]{{[..\footnote{removed: #1} ]}} 5381%DIF END TRADITIONAL PREAMBLE 5382 5383%DIF CFONT PREAMBLE 5384\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1} 5385\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf} 5386\providecommand{\DIFadd}[1]{{\protect\color{blue} \sf #1}} 5387\providecommand{\DIFdel}[1]{{\protect\color{red} \scriptsize #1}} 5388%DIF END CFONT PREAMBLE 5389 5390%DIF FONTSTRIKE PREAMBLE 5391\RequirePackage[normalem]{ulem} 5392\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf} 5393\providecommand{\DIFadd}[1]{{\sf #1}} 5394\providecommand{\DIFdel}[1]{{\footnotesize \sout{#1}}} 5395%DIF END FONTSTRIKE PREAMBLE 5396 5397%DIF CCHANGEBAR PREAMBLE 5398\RequirePackage[dvips]{changebar} 5399\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1} 5400\providecommand{\DIFadd}[1]{\protect\cbstart{\protect\color{blue}#1}\protect\cbend} 5401\providecommand{\DIFdel}[1]{\protect\cbdelete{\protect\color{red}#1}\protect\cbdelete} 5402%DIF END CCHANGEBAR PREAMBLE 5403 5404%DIF CFONTCHBAR PREAMBLE 5405\RequirePackage[dvips]{changebar} 5406\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1} 5407\providecommand{\DIFadd}[1]{\protect\cbstart{\protect\color{blue}\sf #1}\protect\cbend} 5408\providecommand{\DIFdel}[1]{\protect\cbdelete{\protect\color{red}\scriptsize #1}\protect\cbdelete} 5409%DIF END CFONTCHBAR PREAMBLE 5410 5411%DIF CULINECHBAR PREAMBLE 5412\RequirePackage[normalem]{ulem} 5413\RequirePackage[dvips]{changebar} 5414\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1} 5415\providecommand{\DIFadd}[1]{\protect\cbstart{\protect\color{blue}\uwave{#1}}\protect\cbend} 5416\providecommand{\DIFdel}[1]{\protect\cbdelete{\protect\color{red}\sout{#1}}\protect\cbdelete} 5417%DIF END CULINECHBAR PREAMBLE 5418 5419%DIF CHANGEBAR PREAMBLE 5420\RequirePackage[dvips]{changebar} 5421\providecommand{\DIFadd}[1]{\protect\cbstart{#1}\protect\cbend} 5422\providecommand{\DIFdel}[1]{\protect\cbdelete} 5423%DIF END CHANGEBAR PREAMBLE 5424 5425%DIF INVISIBLE PREAMBLE 5426\providecommand{\DIFadd}[1]{#1} 5427\providecommand{\DIFdel}[1]{} 5428%DIF END INVISIBLE PREAMBLE 5429 5430%DIF BOLD PREAMBLE 5431\DeclareOldFontCommand{\bf}{\normalfont\bfseries}{\mathbf} 5432\providecommand{\DIFadd}[1]{{\bf #1}} 5433\providecommand{\DIFdel}[1]{} 5434%DIF END BOLD PREAMBLE 5435 5436%DIF PDFCOMMENT PREAMBLE 5437\RequirePackage{pdfcomment} %DIF PREAMBLE 5438\providecommand{\DIFadd}[1]{\pdfmarkupcomment[author=ADD:,markup=Underline]{#1}{}} 5439\providecommand{\DIFdel}[1]{\pdfcomment[icon=Insert,author=DEL:,hspace=12pt]{#1}} 5440%DIF END PDFCOMMENT PREAMBLE 5441 5442%% SUBTYPES (Markers for beginning and end of changed blocks) 5443 5444%DIF SAFE PREAMBLE 5445\providecommand{\DIFaddbegin}{} 5446\providecommand{\DIFaddend}{} 5447\providecommand{\DIFdelbegin}{} 5448\providecommand{\DIFdelend}{} 5449\providecommand{\DIFmodbegin}{} 5450\providecommand{\DIFmodend}{} 5451%DIF END SAFE PREAMBLE 5452 5453%DIF MARGIN PREAMBLE 5454\providecommand{\DIFaddbegin}{\protect\marginpar{a[}} 5455\providecommand{\DIFaddend}{\protect\marginpar{]}} 5456\providecommand{\DIFdelbegin}{\protect\marginpar{d[}} 5457\providecommand{\DIFdelend}{\protect\marginpar{]}} 5458\providecommand{\DIFmodbegin}{\protect\marginpar{m[}} 5459\providecommand{\DIFmodend}{\protect\marginpar{]}} 5460%DIF END MARGIN PREAMBLE 5461 5462%DIF DVIPSCOL PREAMBLE 5463%Note: only works with dvips converter 5464\RequirePackage{color} 5465\RequirePackage{dvipscol} 5466\providecommand{\DIFaddbegin}{\protect\nogroupcolor{blue}} 5467\providecommand{\DIFaddend}{\protect\nogroupcolor{black}} 5468\providecommand{\DIFdelbegin}{\protect\nogroupcolor{red}} 5469\providecommand{\DIFdelend}{\protect\nogroupcolor{black}} 5470\providecommand{\DIFmodbegin}{} 5471\providecommand{\DIFmodend}{} 5472%DIF END DVIPSCOL PREAMBLE 5473 5474%DIF COLOR PREAMBLE 5475\RequirePackage{color} 5476\providecommand{\DIFaddbegin}{\protect\color{blue}} 5477\providecommand{\DIFaddend}{\protect\color{black}} 5478\providecommand{\DIFdelbegin}{\protect\color{red}} 5479\providecommand{\DIFdelend}{\protect\color{black}} 5480\providecommand{\DIFmodbegin}{} 5481\providecommand{\DIFmodend}{} 5482%DIF END COLOR PREAMBLE 5483 5484%DIF LABEL PREAMBLE 5485% To show only pages with changes (pdf) (external program pdftk needs to be installed) 5486% (only works for simple documents with non-repeated page numbers, otherwise use ZLABEL) 5487% pdflatex diff.tex 5488% pdflatex diff.tex 5489%pdftk diff.pdf cat \ 5490%`perl -lne '\ 5491% if (m/\\newlabel{DIFchg[b](\d*)}{{.*}{(.*)}}/) { $start{$1}=$2; print $2}\ 5492% if (m/\\newlabel{DIFchg[e](\d*)}{{.*}{(.*)}}/) { \ 5493% if (defined($start{$1})) { \ 5494% for ($j=$start{$1}; $j<=$2; $j++) {print "$j";}\ 5495% } else { \ 5496% print "$2"\ 5497% }\ 5498% }' diff.aux \ 5499% | uniq \ 5500% | tr \\n ' '` \ 5501% output diff-changedpages.pdf 5502% To show only pages with changes (dvips/dvipdf) 5503% dvips -pp `\ 5504% [ put here the perl script from above] 5505% | uniq | tr -s \\n ','` 5506\typeout{Check comments in preamble of output for instructions how to show only pages where changes have been made} 5507\newcount\DIFcounterb 5508\global\DIFcounterb 0\relax 5509\newcount\DIFcountere 5510\global\DIFcountere 0\relax 5511\providecommand{\DIFaddbegin}{\global\advance\DIFcounterb 1\relax\label{DIFchgb\the\DIFcounterb}} 5512\providecommand{\DIFaddend}{\global\advance\DIFcountere 1\relax\label{DIFchge\the\DIFcountere}} 5513\providecommand{\DIFdelbegin}{\global\advance\DIFcounterb 1\relax\label{DIFchgb\the\DIFcounterb}} 5514\providecommand{\DIFdelend}{\global\advance\DIFcountere 1\relax\label{DIFchge\the\DIFcountere}} 5515\providecommand{\DIFmodbegin}{\global\advance\DIFcounterb 1\relax\label{DIFchgb\the\DIFcounterb}} 5516\providecommand{\DIFmodend}{\global\advance\DIFcountere 1\relax\label{DIFchge\the\DIFcountere}} 5517%DIF END LABEL PREAMBLE 5518 5519%DIF ZLABEL PREAMBLE 5520% To show only pages with changes (pdf) (external program pdftk needs to be installed) 5521% (uses zref for reference to absolute page numbers) 5522% pdflatex diff.tex 5523% pdflatex diff.tex 5524%pdftk diff.pdf cat \ 5525%`perl -lne 'if (m/\\zref\@newlabel{DIFchgb(\d*)}{.*\\abspage{(\d*)}}/ ) { $start{$1}=$2; print $2 } \ 5526% if (m/\\zref\@newlabel{DIFchge(\d*)}{.*\\abspage{(\d*)}}/) { \ 5527% if (defined($start{$1})) { \ 5528% for ($j=$start{$1}; $j<=$2; $j++) {print "$j";}\ 5529% } else { \ 5530% print "$2"\ 5531% }\ 5532% }' diff.aux \ 5533% | uniq \ 5534% | tr \\n ' '` \ 5535% output diff-changedpages.pdf 5536% To show only pages with changes (dvips/dvipdf) 5537% latex diff.tex 5538% latex diff.tex 5539% dvips -pp `perl -lne 'if (m/\\newlabel{DIFchg[be]\d*}{{.*}{(.*)}}/) { print $1 }' diff.aux | uniq | tr -s \\n ','` diff.dvi 5540\typeout{Check comments in preamble of output for instructions how to show only pages where changes have been made} 5541\usepackage[user,abspage]{zref} 5542\newcount\DIFcounterb 5543\global\DIFcounterb 0\relax 5544\newcount\DIFcountere 5545\global\DIFcountere 0\relax 5546\providecommand{\DIFaddbegin}{\global\advance\DIFcounterb 1\relax\zlabel{DIFchgb\the\DIFcounterb}} 5547\providecommand{\DIFaddend}{\global\advance\DIFcountere 1\relax\zlabel{DIFchge\the\DIFcountere}} 5548\providecommand{\DIFdelbegin}{\global\advance\DIFcounterb 1\relax\zlabel{DIFchgb\the\DIFcounterb}} 5549\providecommand{\DIFdelend}{\global\advance\DIFcountere 1\relax\zlabel{DIFchge\the\DIFcountere}} 5550\providecommand{\DIFmodbegin}{\global\advance\DIFcounterb 1\relax\zlabel{DIFchgb\the\DIFcounterb}} 5551\providecommand{\DIFmodend}{\global\advance\DIFcountere 1\relax\zlabel{DIFchge\the\DIFcountere}} 5552%DIF END ZLABEL PREAMBLE 5553 5554%DIF ONLYCHANGEDPAGE PREAMBLE 5555\RequirePackage{atbegshi} 5556\RequirePackage{etoolbox} 5557\RequirePackage{zref} 5558% redefine label command to write immediately to aux file - page references will be lost 5559\makeatletter \let\oldlabel\label% Store \label 5560\renewcommand{\label}[1]{% Update \label to write to the .aux immediately 5561\zref@wrapper@immediate{\oldlabel{#1}}} 5562\makeatother 5563\newbool{DIFkeeppage} 5564\newbool{DIFchange} 5565\boolfalse{DIFkeeppage} 5566\boolfalse{DIFchange} 5567\AtBeginShipout{% 5568 \ifbool{DIFkeeppage} 5569 {\global\boolfalse{DIFkeeppage}} % True DIFkeeppage 5570 {\ifbool{DIFchange}{\global\boolfalse{DIFkeeppage}}{\global\boolfalse{DIFkeeppage}\AtBeginShipoutDiscard}} % False DIFkeeppage 5571} 5572\providecommand{\DIFaddbegin}{\global\booltrue{DIFkeeppage}\global\booltrue{DIFchange}} 5573\providecommand{\DIFaddend}{\global\booltrue{DIFkeeppage}\global\boolfalse{DIFchange}} 5574\providecommand{\DIFdelbegin}{\global\booltrue{DIFkeeppage}\global\booltrue{DIFchange}} 5575\providecommand{\DIFdelend}{\global\booltrue{DIFkeeppage}\global\boolfalse{DIFchange}} 5576\providecommand{\DIFmodbegin}{\global\booltrue{DIFkeeppage}\global\booltrue{DIFchange}} 5577\providecommand{\DIFmodend}{\global\booltrue{DIFkeeppage}\global\boolfalse{DIFchange}} 5578%DIF END ONLYCHANGEDPAGE PREAMBLE 5579 5580%% FLOAT TYPES 5581 5582%DIF FLOATSAFE PREAMBLE 5583\providecommand{\DIFaddFL}[1]{\DIFadd{#1}} 5584\providecommand{\DIFdelFL}[1]{\DIFdel{#1}} 5585\providecommand{\DIFaddbeginFL}{} 5586\providecommand{\DIFaddendFL}{} 5587\providecommand{\DIFdelbeginFL}{} 5588\providecommand{\DIFdelendFL}{} 5589%DIF END FLOATSAFE PREAMBLE 5590 5591%DIF IDENTICAL PREAMBLE 5592\providecommand{\DIFaddFL}[1]{\DIFadd{#1}} 5593\providecommand{\DIFdelFL}[1]{\DIFdel{#1}} 5594\providecommand{\DIFaddbeginFL}{\DIFaddbegin} 5595\providecommand{\DIFaddendFL}{\DIFaddend} 5596\providecommand{\DIFdelbeginFL}{\DIFdelbegin} 5597\providecommand{\DIFdelendFL}{\DIFdelend} 5598%DIF END IDENTICAL PREAMBLE 5599 5600%DIF TRADITIONALSAFE PREAMBLE 5601% procidecommand color to make this work for TRADITIONAL and CTRADITIONAL 5602\providecommand{\color}[1]{} 5603\providecommand{\DIFaddFL}[1]{\DIFadd{#1}} 5604\providecommand{\DIFdel}[1]{{\protect\color{red}[..{\scriptsize {removed: #1}} ]}} 5605\providecommand{\DIFaddbeginFL}{} 5606\providecommand{\DIFaddendFL}{} 5607\providecommand{\DIFdelbeginFL}{} 5608\providecommand{\DIFdelendFL}{} 5609%DIF END TRADITIONALSAFE PREAMBLE 5610 5611% see: 5612% http://tex.stackexchange.com/questions/47351/can-i-redefine-a-command-to-contain-itself 5613 5614%DIF HIGHLIGHTGRAPHICS PREAMBLE 5615\RequirePackage{settobox} 5616\RequirePackage{letltxmacro} 5617\newsavebox{\DIFdelgraphicsbox} 5618\newlength{\DIFdelgraphicswidth} 5619\newlength{\DIFdelgraphicsheight} 5620% store original definition of \includegraphics 5621\LetLtxMacro{\DIFOincludegraphics}{\includegraphics} 5622\newcommand{\DIFaddincludegraphics}[2][]{{\color{blue}\fbox{\DIFOincludegraphics[#1]{#2}}}} 5623\newcommand{\DIFdelincludegraphics}[2][]{% 5624\sbox{\DIFdelgraphicsbox}{\DIFOincludegraphics[#1]{#2}}% 5625\settoboxwidth{\DIFdelgraphicswidth}{\DIFdelgraphicsbox} 5626\settoboxtotalheight{\DIFdelgraphicsheight}{\DIFdelgraphicsbox} 5627\scalebox{\DIFscaledelfig}{% 5628\parbox[b]{\DIFdelgraphicswidth}{\usebox{\DIFdelgraphicsbox}\\[-\baselineskip] \rule{\DIFdelgraphicswidth}{0em}}\llap{\resizebox{\DIFdelgraphicswidth}{\DIFdelgraphicsheight}{% 5629\setlength{\unitlength}{\DIFdelgraphicswidth}% 5630\begin{picture}(1,1)% 5631\thicklines\linethickness{2pt} 5632{\color[rgb]{1,0,0}\put(0,0){\framebox(1,1){}}}% 5633{\color[rgb]{1,0,0}\put(0,0){\line( 1,1){1}}}% 5634{\color[rgb]{1,0,0}\put(0,1){\line(1,-1){1}}}% 5635\end{picture}% 5636}\hspace*{3pt}}} 5637} 5638\LetLtxMacro{\DIFOaddbegin}{\DIFaddbegin} 5639\LetLtxMacro{\DIFOaddend}{\DIFaddend} 5640\LetLtxMacro{\DIFOdelbegin}{\DIFdelbegin} 5641\LetLtxMacro{\DIFOdelend}{\DIFdelend} 5642\DeclareRobustCommand{\DIFaddbegin}{\DIFOaddbegin \let\includegraphics\DIFaddincludegraphics} 5643\DeclareRobustCommand{\DIFaddend}{\DIFOaddend \let\includegraphics\DIFOincludegraphics} 5644\DeclareRobustCommand{\DIFdelbegin}{\DIFOdelbegin \let\includegraphics\DIFdelincludegraphics} 5645\DeclareRobustCommand{\DIFdelend}{\DIFOaddend \let\includegraphics\DIFOincludegraphics} 5646\LetLtxMacro{\DIFOaddbeginFL}{\DIFaddbeginFL} 5647\LetLtxMacro{\DIFOaddendFL}{\DIFaddendFL} 5648\LetLtxMacro{\DIFOdelbeginFL}{\DIFdelbeginFL} 5649\LetLtxMacro{\DIFOdelendFL}{\DIFdelendFL} 5650\DeclareRobustCommand{\DIFaddbeginFL}{\DIFOaddbeginFL \let\includegraphics\DIFaddincludegraphics} 5651\DeclareRobustCommand{\DIFaddendFL}{\DIFOaddendFL \let\includegraphics\DIFOincludegraphics} 5652\DeclareRobustCommand{\DIFdelbeginFL}{\DIFOdelbeginFL \let\includegraphics\DIFdelincludegraphics} 5653\DeclareRobustCommand{\DIFdelendFL}{\DIFOaddendFL \let\includegraphics\DIFOincludegraphics} 5654%DIF END HIGHLIGHTGRAPHICS PREAMBLE 5655 5656%% SPECIAL PACKAGE PREAMBLE COMMANDS 5657 5658% Standard \DIFadd and \DIFdel are redefined as \DIFaddtex and \DIFdeltex 5659% when hyperref package is included. 5660%DIF HYPERREF PREAMBLE 5661\providecommand{\DIFadd}[1]{\texorpdfstring{\DIFaddtex{#1}}{#1}} 5662\providecommand{\DIFdel}[1]{\texorpdfstring{\DIFdeltex{#1}}{}} 5663%DIF END HYPERREF PREAMBLE 5664 5665%DIF LISTINGS PREAMBLE 5666\RequirePackage{listings} 5667\RequirePackage{color} 5668\lstdefinelanguage{DIFcode}{ 5669 % note that the definitions in the following two lines are overwritten dependent on the markup type selected %DIFCODE TEMPLATE 5670 morecomment=[il]{\%DIF\ <\ }, %DIFCODE TEMPLATE 5671 moredelim=[il][\bfseries]{\%DIF\ >\ } %DIFCODE TEMPLATE 5672} 5673\lstdefinestyle{DIFverbatimstyle}{ 5674 language=DIFcode, 5675 basicstyle=\ttfamily, 5676 columns=fullflexible, 5677 keepspaces=true 5678} 5679\lstnewenvironment{DIFverbatim}{\lstset{style=DIFverbatimstyle}}{} 5680\lstnewenvironment{DIFverbatim*}{\lstset{style=DIFverbatimstyle,showspaces=true}}{} 5681%DIF END LISTINGS PREAMBLE 5682 5683%DIF DIFCODE_UNDERLINE 5684 moredelim=[il][\color{red}\sout]{\%DIF\ <\ }, 5685 moredelim=[il][\color{blue}\uwave]{\%DIF\ >\ } 5686%DIF END DIFCODE_UNDERLINE 5687 5688%DIF DIFCODE_CTRADITIONAL 5689 moredelim=[il][\color{red}\scriptsize]{\%DIF\ <\ }, 5690 moredelim=[il][\color{blue}\sffamily]{\%DIF\ >\ } 5691%DIF END DIFCODE_CTRADITIONAL 5692 5693%DIF DIFCODE_TRADITIONAL 5694 moredelim=[il][\color{white}\tiny]{\%DIF\ <\ }, 5695 moredelim=[il][\sffamily]{\%DIF\ >\ } 5696%DIF END DIFCODE_TRADITIONAL 5697 5698%DIF DIFCODE_CFONT 5699 moredelim=[il][\color{red}\scriptsize]{\%DIF\ <\ }, 5700 moredelim=[il][\color{blue}\sffamily]{\%DIF\ >\ } 5701%DIF END DIFCODE_CFONT 5702 5703%DIF DIFCODE_FONTSTRIKE 5704 moredelim=[il][\scriptsize \sout]{\%DIF\ <\ }, 5705 moredelim=[il][\sffamily]{\%DIF\ >\ } 5706%DIF END DIFCODE_FONTSTRIKE 5707 5708%DIF DIFCODE_INVISIBLE 5709 moredelim=[il][\color{white}\tiny]{\%DIF\ <\ }, 5710 moredelim=[il]{\%DIF\ >\ } 5711%DIF END DIFCODE_INVISIBLE 5712 5713%DIF DIFCODE_CHANGEBAR 5714 moredelim=[il][\color{white}\tiny]{\%DIF\ <\ }, 5715 moredelim=[il]{\%DIF\ >\ } 5716%DIF END DIFCODE_CHANGEBAR 5717 5718%DIF DIFCODE_CCHANGEBAR 5719 moredelim=[il][\color{red}]{\%DIF\ <\ }, 5720 moredelim=[il][\color{blue}]{\%DIF\ >\ } 5721%DIF END DIFCODE_CCHANGEBAR 5722 5723%DIF DIFCODE_CULINECHBAR 5724 moredelim=[il][\color{red}\sout]{\%DIF\ <\ }, 5725 moredelim=[il][\color{blue}\uwave]{\%DIF\ >\ } 5726%DIF END DIFCODE_CULINECHBAR 5727 5728%DIF DIFCODE_CFONTCHBAR 5729 moredelim=[il][\color{red}\scriptsize]{\%DIF\ <\ }, 5730 moredelim=[il][\color{blue}\sffamily]{\%DIF\ >\ } 5731%DIF END DIFCODE_CFONTCHBAR 5732 5733%DIF DIFCODE_BOLD 5734 % unfortunately \bfseries cannot be combined with ttfamily without extra packages 5735 % also morecomment=[il] is broken as of v1.5b of listings at least 5736 % workaround: plot in white with tiny font 5737 % morecomment=[il]{\%DIF\ <\ }, 5738 moredelim=[il][\color{white}\tiny]{\%DIF\ <\ }, 5739 moredelim=[il][\sffamily\bfseries]{\%DIF\ >\ } 5740%DIF END DIFCODE_BOLD 5741 5742%DIF DIFCODE_PDFCOMMENT 5743 5744 moredelim=[il][\color{white}\tiny]{\%DIF\ <\ }, 5745 moredelim=[il][\sffamily\bfseries]{\%DIF\ >\ } 5746%DIF END DIFCODE_PDFCOMMENT 5747 5748