1#!/usr/bin/env perl
2##!/usr/bin/perl -w
3# latexdiff - differences two latex files on the word level
4#             and produces a latex file with the differences marked up.
5#
6#   Copyright (C) 2004-20  F J Tilmann (tilmann@gfz-potsdam.de)
7#
8# Repository/issue tracker:   https://github.com/ftilmann/latexdiff
9# CTAN page:          http://www.ctan.org/pkg/latexdiff
10#
11#    This program is free software: you can redistribute it and/or modify
12#    it under the terms of the GNU General Public License as published by
13#    the Free Software Foundation, either version 3 of the License, or
14#    (at your option) any later version.
15#
16#    This program is distributed in the hope that it will be useful,
17#    but WITHOUT ANY WARRANTY; without even the implied warranty of
18#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19#    GNU General Public License for more details.
20#    You should have received a copy of the GNU General Public License
21#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
22#
23# Detailed usage information at the end of the file
24#
25
26# Version 1.3.1.1
27#  - remove spurious \n to fix error: Unknown regexp modifier "/n" at .../latexdiff line 1974, near "=~ " (see github issue #201)
28#
29# Version 1.3.1
30#    Bug fixes:
31#      - remove some uninitialised variable $2 warnings in string substitution in flatten function in case included file is not found
32#      - add minimal postprocessing to diff processing of preamble commands (replace \RIGHTBRACE by \} )
33#      - pre-processing: replace (contributed) routine take_comments_and_enter_from_frac() with take_comments_and_newline_from_frac(), which does the same thing
34#        (remove whitespace characters and comments between the argument of \frac commands) in an easier and more robust way. In addition, it
35#        will replace commands like \frac12 with \frac{1}{2} as pre-processing step.   Fixes issue #184
36#      - add "intertext" to list of unsafe math commands @UNSAFEMATHCMD . Fixes issue #179
37#      - provide citation command patterns for biblatex and protect them with mbox'es. Fixes issue #199
38#      - hardcode number of parameters for \href and \url commands to allow spaces between commands and arguments even if --allow-spaces option is not used (this
39#        is needed because some bibliography styles add such in-command-sequence spaces)  Fixes issues: #178 #198
40#      - bibitem is now kept even in deleted blocks such that deleted references show up properly (this implies that the actual numbers in numerical referencing schemes will change)
41#        (this is implemented by introducing a new class of commands KEEPCMD , which are kept as is in deleted environments (no effect in added environments). Currently
42#        \bibitem   is hardwired to be the only member of this class  (fixes issue #194, #174)
43#    Features:
44#      - add some special processing for revtex bibliography commands, so that the spaces between bibliography commands \bibfield and \bibinfo and their arguments are ignored.
45#         (fixes issue #194, should fix #174)
46#
47# Version 1.3.0 (7 October 2018):
48#    - treat options to \documentclass as potential package names (some packages allow implicit loading of or imply selected packages
49#    - improved pattern matching: now allows nested angular brackets, and is no longer confused by escaped curly braces
50#    - improved pattern matching in COARSE mode: occasionally, the closing bracket or some other elements would be matched in an 'unnatural' way due to another sequence being more minimal in the computational sense, sometimes even causing errors due to tokens moving in or out of the scope of math environments. This is now discouraged by adding internal \DIFANCHOR commands (which are removed again in post-processing) (fixes issues reported via email by li_ruomeng .
51#    - verbatim and lstlisting environments are marked-up with line-by-line in a similar style to non-verbatim text (requires the listing package to be installed)
52#       (see new configuration variable VERBATIMLINEENV) (several issues and pull requests by jprotze)
53#    - --flatten: now supports \verbatiminput and \lstlistinput
54#     - --flatten: if file is not found, do not fail, simply warn and leave command unexpanded (inspired by issue #112).  Don't warn if file name contains #[0-9] as it is then most likely an argument within a command definition rather than an actual file (applies to \input, \subfile, \include commands)
55#     - added to textcmds: \intertext
56#    - new config variable CUSTOMDIFCMD to allow defining special versions of  commands  in added or deleted blocks (Pull request by github user jprotze)
57#    - added option -no-links (mostly for use by latexdiff-vc in only-changes modes) (Pull request by github user jprotze)
58#    - new option --filter-script to run both input through a pre-processing script (PR jasonmccsmith  #167)
59#      new option --no-filter-stderr to hide stderr output from filter-script (potentially dangerous, as this might hide malfunctioning of filter scripts)
60#    - --flatten now can deal with imports made using the import package {PR jasonmccsmith #173)
61#   Bug fixes:
62#    - pattern matching of \verb and \lstinline commands had an error which meant they would trigger on commands beginning with \verb.
63#    - In description environments, mark up item descriptions by effectively reating the insides of item commannds as text commands (fixes #161)
64#
65#
66# Version 1.2.1 (22 June 2017)
67#    - add "DeclareOldFontCommand" to styles using \bf or \sf old style font commands (fixies issue #92 )
68#    - improved markup: process lstinline commands in listings package correctly
69#      for styles using colour, \verb and \lstinline arguments are marked up with colour (blue for added, red for deleted)
70#    - bug fix: protecting inline math expressions for mbox did not work as intended (see stack exchange question: http://tex.stackexchange.com/questions/359412/compiling-the-latexdiff-when-adding-a-subscript-before-a-pmatrix-environment-cau)
71#    - bug fix: when deleted \item commands are followed immediately by unsafe commands, they were not restored properly
72#      (thanks to J. Protze for pull request) (pull request #89)
73#    - treat lstlisting and comment as equivalent to verbatim environment
74#      make environments that are treated like verbatim environments configurable (config variable VERBATIMENV)
75#      treat lstinlne as equivalent to verb command
76#      partially addresses issue #38
77#    - refactoring: set default configuration variables in a hash, and those that correspond to lists
78#    - feature: option --add-to-config used to amend configuration variables, which are regex pattern lists
79#    - bug fix: deleted figures when endfloat package is activated
80#    - bug fix: alignat environment now always processed correctly (fix issues #65)
81#    - bug fix: avoid processing of commands as potential files in routine init_regex_arr (fix issue #70 )
82#    - minimal feature enhancement: treat '@' as allowed character in commands (strictly speaking requires prior \makeatletter statement, but always assuming it to be
83#       @       a letter if it is part of a command name will usually lead to the correct behaviour (see http://tex.stackexchange.com/questions/346651/latexdiff-and-let)
84#    - new feature/bug fix: --flatten option \endinput in included files now respected but only if \endinput stands right at the beginning of the line (issue #77)
85#    - bug fix: flatten would incorrectly attempt to process commented out \include commands (from discussion in issue #77 )
86#    - introduce an invisible space (\hspace{0pt} after \mbox{..} auxiliary commands (not in math mode), to allow line breaks between added and deleted citations (change should not cause adverse behaviour otherwise)
87#
88# Version 1.2.0:
89#    - highlight new and deleted figures
90#    - bug fix in title mark-up. Previously deleted commands in title (such as \title, \author or \date) were marked up erroneously
91#    - (minor) bug fixes in new 1.1.1 features: disabled label was commented out twice, additional spaces were introduced before list environment begin and end commands
92#    - depracation fix: left brace in RegEx now needs to be escaped
93#    - add type PDFCOMMENT based on issue #49 submitted by github user peci1 (Martin Pecka)
94#    - make utf8 the default encoding
95#
96# Version 1.1.1
97#    - patch mhchem: allow ce in equations
98#    - flatten now also expands \input etc. in the preamble (but not \usepackage!)
99#    - Better support for Japanese ( contributed by github user kshramt )
100#    - prevent duplicated verbatim hashes (patch contributed by github user therussianjig, issue #36)
101#    - disable deleted label commands (fixes issue #31)
102#    - introduce post-processing to reinstate most deleted environments and all needed item commands (fixes issue #1)
103#
104# Version 1.1.0
105#    - treat diacritics (\",\', etc) as safe commands
106#    - treat \_ and \& correctly as safe commands, even if used without spacing to the next word
107#    - Add a BOLD markup type that sets added text in bold face (Contribution by Victor Zabalza via pull request )
108#    - add append-mboxsafecmd list option to be able to specify special safe commands which need to be surrounded by mbox to avoid breaking (mostly this is needed with ulem package)
109#    - support for siunitx and cleveref packages: protect \SI command in siunitx package and \cref,\Cref{range}{*} in cleveref packages (thanks to Stefan Pinnow for testing)
110#    - experimental support for chemformula, mhchem packages: define \ch and \ce in packages as safe (but not \ch,\cee in equation array environments) - these unfortunately will not be marked up (thanks to Stefan Pinnow for testing)
111#    - bug fix: packages identified correctly even if \usepackage command options extend over several lines (previously \usepackage command needed to be fully contained in one line)
112#    - new subtype ONLYCHANGEDPAGE outputs only changed pages (might not work well for floating material)
113#    - new subtype ZLABEL operates similarly to LABEL but uses absolute page numbers (needs zref package)
114#    - undocumented option --debug/--nodebug to override default setting for debug mode (Default: 0 for release version, 1: for development version
115#
116# Version 1.0.4
117#    - introduce list UNSAFEMATHCMD, which holds list of commands which cannot be marked up with \DIFadd or \DIFdel commands  (only relevant for WHOLE and COARSE math markup modes)
118#    - new subtype LABEL which gives each change a label. This can later be used to only display pages where changes
119#      have been made (instructions for that are put as comments into the diff'ed file) inspired by answer on http://tex.stackexchange.com/questions/166049/invisible-markers-in-pdfs-using-pdflatex
120#    - Configuration variables take into accout some commands from additional packages:
121#      tikzpicture environment now treated as PICTUREENV, and \smallmatrix in ARRENV (amsmath)
122#    - --flatten: support for \subfile command (subfiles package)  (in response to http://tex.stackexchange.com/questions/167620/latexdiff-with-subfiles )
123#    - --flatten: \bibliography commands expand if corresponding bbl file present
124#    - angled bracket optional commands now parsed correctly (patch #3570) submitted by Dave Kleinschmidt (thanks)
125#    - \RequirePackage now treated as synonym of \usepackage with respect to setting packages
126#    - special rules for apacite package (redefine citation commands)
127#    - recognise /dev/null as 'file-like' arguments for --preamble and --config options
128#    - fix units package incompatibility with ulem for text maths statements $ ..$ (thanks to Stuart Prescott for reporting this)
129#    - amsmath environment cases treated correctly (Bug fix #19029) (thanks to Jalar)
130#    - {,} in comments no longer confuse latexdiff (Bug fix #19146)
131#    - \% in one-letter sub/Superscripts was not converted correctly
132#
133# Version 1.0.3
134#    - fix bug in add_safe_commands that made latexdiff hang on DeclareMathOperator
135#      command in preamble
136#    - \(..\) inline math expressions were not parsed correctly, if they contained a linebreak
137#    - applied patch contributed by tomflannaghan via Berlios: [ Patch #3431 ] Adds correct handling of \left< and \right>
138#    - \$ is treated correctly as a literal dollar sign (thanks to Reed Cartwright and Joshua Miller for reporting this bug
139#      and sketching out the solution)
140#    - \^ and \_ are correctly interpreted as accent and underlined space, respectively, not as superscript of subscript
141#      (thanks to Wail Yahyaoui for pointing out this bug)
142#
143# Version 1.0.1 - treat \big,\bigg etc. equivalently to \left and
144#              \right - include starred version in MATHENV - apply
145#            - flatten recursively and --flatten expansion is now
146#              aware of comments (thanks to Tim Connors for patch)
147#            - Change to post-processing for more reliability for
148#              deleted math environments
149#            - On linux systems, recognise  and remove DOS style newlines
150#            - Provide markup for some special preamble commands (\title,
151#              \author,\date,
152#            - configurable by setting context2cmd
153#            - for styles using ulem package, remove \emph and \text.. from list of
154#              safe commands in order to allow linebreaks within the
155#              highlighted sections.
156#            - for ulem style, now show citations by enclosing them in \mbox commands.
157#              This unfortunately implies linebreaks within citations no longer function,
158#              so this functionality can be turned off (Option --disable-citation-markup).
159#              With --enable-citation-markup, the mbox markup is forced for other styles)
160#            - new substyle COLOR.  This is particularly useful for marking up citations
161#              and some special post-processing is implemented to retain cite
162#              commands in deleted blocks.
163#            - four different levels of math-markup
164#            - Option --driver for choosing driver for modes employing changebar package
165#            - accept \\* as valid command (and other commands of form \.*). Also accept
166#              \<nl> (backslashed newline)
167#            - some typo fixes, include commands defined in preamble as safe commands
168#              (Sebastian Gouezel)
169#            - include compared filenames as comments as line 2 and 3 of
170#              the preamble (can be modified with option --label, and suppressed with
171#              --no-label), option --visible-label to show files in generated pdf or dvi
172#              at the beginning of main document
173#
174# Version 0.5  A number of minor improvements based on feedback
175#              Deleted blocks are now shown before added blocks
176#              Package specific processing
177#
178# Version 0.43 unreleased typo in list of styles at the end
179#              Add protect to all \cbstart, \cbend commands
180#              More robust substitution of deleted math commands
181#
182# Version 0.42 November 06  Bug fixes only
183#
184# Version 0.4   March 06 option for fast differencing using UNIX diff command, several minor bug fixes (\par bug, improved highlighting of textcmds)
185#
186# Version 0.3   August 05 improved parsing of displayed math, --allow-spaces
187#               option, several minor bug fixes
188#
189# Version 0.25  October 04 Fix bug with deleted equations, add math mode commands to safecmd, add | to allowed interpunctuation signs
190# Version 0.2   September 04 extension to utf-8 and variable encodings
191# Version 0.1   August 04    First public release
192
193# Inserted block for differenceing
194# use Algorithm::Diff qw(traverse_sequences);
195# in standard version
196# The following BEGIN block contains a verbatim copy of
197# Ned Konz' Algorithm::Diff package version 1.15 except
198# that subroutine _longestCommonSubsequence has been replace by
199# a routine which internally uses the UNIX diff command for
200# the differencing rather than the Perl routines if the
201# length of the sequences exceeds some threshold.
202# Also, all POD documentation has been stripped out.
203#
204# (the distribution on which this modification is based is available
205#  from http://search.cpan.org/~nedkonz/Algorithm-Diff-1.15
206#  the most recent version can be found via  http://search.cpan.org/search?module=Algorithm::Diff )
207# Please note the LICENCE for Algorithm::Diff :
208#   "Copyright (c) 2000-2002 Ned Konz.  All rights reserved.
209#    This program is free software;
210#    you can redistribute it and/or modify it under the same terms
211#    as Perl itself."
212# The fast-differencing version of latexdiff is provided as a convenience
213# for latex users under Unix-like systems which have a 'diff' command.
214# If you believe
215# the inlining of Algorithm::Diff violates its license please contact
216# me and I will modify the latexdiff distribution accordingly.
217# Frederik Tilmann (tilmann@esc.cam.ac.uk)
218# Jonathan Paisley is acknowledged for the idea of using the system diff
219# command to achieve shorter running times
220BEGIN {
221package Algorithm::Diff;
222use strict;
223use vars qw($VERSION @EXPORT_OK @ISA @EXPORT);
224use integer;    # see below in _replaceNextLargerWith() for mod to make
225                # if you don't use this
226require Exporter;
227@ISA       = qw(Exporter);
228@EXPORT    = qw();
229@EXPORT_OK = qw(LCS diff traverse_sequences traverse_balanced sdiff);
230$VERSION = sprintf('%d.%02d fast', (q$Revision: 1.15 $ =~ /\d+/g));
231
232# Global parameters
233
234use File::Temp qw/tempfile/;
235# if larger number of elements in longestCommonSubsequence smaller than
236# this number, then use internal algorithm, otherwise use UNIX diff
237use constant THRESHOLD => 100 ;
238# Detect whether diff --minimal option is available
239# if yes we use it
240use constant MINIMAL => ( system('diff','--minimal','/dev/null','/dev/null') >> 8 ==0 ? "--minimal" : "" ) ;
241
242
243
244# McIlroy-Hunt diff algorithm
245# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
246# by Ned Konz, perl@bike-nomad.com
247
248
249# Create a hash that maps each element of $aCollection to the set of positions
250# it occupies in $aCollection, restricted to the elements within the range of
251# indexes specified by $start and $end.
252# The fourth parameter is a subroutine reference that will be called to
253# generate a string to use as a key.
254# Additional parameters, if any, will be passed to this subroutine.
255#
256# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
257
258sub _withPositionsOfInInterval
259{
260	my $aCollection = shift;    # array ref
261	my $start       = shift;
262	my $end         = shift;
263	my $keyGen      = shift;
264	my %d;
265	my $index;
266	for ( $index = $start ; $index <= $end ; $index++ )
267	{
268		my $element = $aCollection->[$index];
269		my $key = &$keyGen( $element, @_ );
270		if ( exists( $d{$key} ) )
271		{
272			unshift ( @{ $d{$key} }, $index );
273		}
274		else
275		{
276			$d{$key} = [$index];
277		}
278	}
279	return wantarray ? %d : \%d;
280}
281
282# Find the place at which aValue would normally be inserted into the array. If
283# that place is already occupied by aValue, do nothing, and return undef. If
284# the place does not exist (i.e., it is off the end of the array), add it to
285# the end, otherwise replace the element at that point with aValue.
286# It is assumed that the array's values are numeric.
287# This is where the bulk (75%) of the time is spent in this module, so try to
288# make it fast!
289
290sub _replaceNextLargerWith
291{
292	my ( $array, $aValue, $high ) = @_;
293	$high ||= $#$array;
294
295	# off the end?
296	if ( $high == -1 || $aValue > $array->[-1] )
297	{
298		push ( @$array, $aValue );
299		return $high + 1;
300	}
301
302	# binary search for insertion point...
303	my $low = 0;
304	my $index;
305	my $found;
306	while ( $low <= $high )
307	{
308		$index = ( $high + $low ) / 2;
309
310		#		$index = int(( $high + $low ) / 2);		# without 'use integer'
311		$found = $array->[$index];
312
313		if ( $aValue == $found )
314		{
315			return undef;
316		}
317		elsif ( $aValue > $found )
318		{
319			$low = $index + 1;
320		}
321		else
322		{
323			$high = $index - 1;
324		}
325	}
326
327	# now insertion point is in $low.
328	$array->[$low] = $aValue;    # overwrite next larger
329	return $low;
330}
331
332# This method computes the longest common subsequence in $a and $b.
333
334# Result is array or ref, whose contents is such that
335# 	$a->[ $i ] == $b->[ $result[ $i ] ]
336# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
337
338# An additional argument may be passed; this is a hash or key generating
339# function that should return a string that uniquely identifies the given
340# element.  It should be the case that if the key is the same, the elements
341# will compare the same. If this parameter is undef or missing, the key
342# will be the element as a string.
343
344# By default, comparisons will use "eq" and elements will be turned into keys
345# using the default stringizing operator '""'.
346
347# Additional parameters, if any, will be passed to the key generation routine.
348
349sub _longestCommonSubsequence
350{
351	my $a      = shift;    # array ref
352	my $b      = shift;    # array ref
353	my $keyGen = shift;    # code ref
354	my $compare;           # code ref
355
356	# set up code refs
357	# Note that these are optimized.
358	if ( !defined($keyGen) )    # optimize for strings
359	{
360		$keyGen = sub { $_[0] };
361		$compare = sub { my ( $a, $b ) = @_; $a eq $b };
362	}
363	else
364	{
365		$compare = sub {
366			my $a = shift;
367			my $b = shift;
368			&$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
369		};
370	}
371
372	my ( $aStart, $aFinish, $bStart, $bFinish, $matchVector ) =
373	  ( 0, $#$a, 0, $#$b, [] );
374
375	# Check whether to use internal routine (small number of elements)
376	# or use it as a wrapper for UNIX diff
377	if ( ( $#$a > $#$b ?  $#$a : $#$b) < THRESHOLD ) {
378	  ###	  print STDERR "DEBUG: regular longestCommonSubsequence\n";
379	  # First we prune off any common elements at the beginning
380	  while ( $aStart <= $aFinish
381		  and $bStart <= $bFinish
382		  and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
383	    {
384	      $matchVector->[ $aStart++ ] = $bStart++;
385	    }
386
387	  # now the end
388	  while ( $aStart <= $aFinish
389		and $bStart <= $bFinish
390		and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
391	    {
392	      $matchVector->[ $aFinish-- ] = $bFinish--;
393	    }
394
395	  # Now compute the equivalence classes of positions of elements
396	  my $bMatches =
397	    _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
398	  my $thresh = [];
399	  my $links  = [];
400
401	  my ( $i, $ai, $j, $k );
402	  for ( $i = $aStart ; $i <= $aFinish ; $i++ )
403	    {
404	      $ai = &$keyGen( $a->[$i], @_ );
405	      if ( exists( $bMatches->{$ai} ) )
406		{
407		  $k = 0;
408		  for $j ( @{ $bMatches->{$ai} } )
409		    {
410
411		      # optimization: most of the time this will be true
412		      if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
413			{
414			  $thresh->[$k] = $j;
415			}
416		      else
417			{
418			  $k = _replaceNextLargerWith( $thresh, $j, $k );
419			}
420
421		      # oddly, it's faster to always test this (CPU cache?).
422		      if ( defined($k) )
423			{
424			  $links->[$k] =
425			    [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
426			}
427		    }
428		}
429	    }
430
431	  if (@$thresh)
432	    {
433	      for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
434		{
435		  $matchVector->[ $link->[1] ] = $link->[2];
436		}
437	    }
438	}
439	else {
440	  my ($fha,$fhb,$fna,$fnb,$ele,$key);
441	  my ($alines,$blines,$alb,$alf,$blb,$blf);
442	  my ($minimal)=MINIMAL;
443	  # large number of elements, use system diff
444	  ###	  print STDERR "DEBUG: fast (diff) longestCommonSubsequence\n";
445
446	  ($fha,$fna)=tempfile("DiffA-XXXX") or die "_longestCommonSubsequence: Cannot open tempfile for sequence A";
447	  ($fhb,$fnb)=tempfile("DiffB-XXXX") or die "_longestCommonSubsequence: Cannot open tempfile for sequence B";
448	  # prepare sequence A
449	  foreach $ele ( @$a ) {
450	    $key=&$keyGen( $ele, @_ );
451	    $key =~ s/\\/\\\\/g ;
452	    $key =~ s/\n/\\n/sg ;
453	    print $fha "$key\n" ;
454	  }
455	  close($fha);
456	  # prepare sequence B
457	  foreach $ele ( @$b ) {
458	    $key=&$keyGen( $ele, @_ );
459	    $key =~ s/\\/\\\\/g ;
460	    $key =~ s/\n/\\n/sg ;
461	    print $fhb "$key\n" ;
462	  }
463	  close($fhb);
464
465	  open(DIFFPIPE, "diff $minimal $fna $fnb |") or die "_longestCommonSubsequence: Cannot launch diff process. $!" ;
466	  # The diff line numbering begins with 1, but Perl subscripts start with 0
467	  # We follow the diff numbering but substract 1 when assigning to matchVector
468	  $aStart++; $bStart++ ; $aFinish++ ; $bFinish++ ;
469	  while( <DIFFPIPE> ) {
470	    if ( ($alines,$blines) = ( m/^(\d*(?:,\d*)?)?c(\d*(?:,\d*)?)?$/ ) ) {
471	      ($alb,$alf)=split(/,/,$alines);
472	      ($blb,$blf)=split(/,/,$blines);
473	      $alf=$alb unless defined($alf);
474	      $blf=$blb unless defined($blf);
475	      while($aStart < $alb ) {
476		$matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ;
477	      }
478	      # check for consistency
479	      $bStart==$blb or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency in changed sequence";
480	      $aStart=$alf+1;
481	      $bStart=$blf+1;
482	    }
483	    elsif ( ($alb,$blines) = ( m/^(\d*)a(\d*(?:,\d*)?)$/ ) ) {
484	      ($blb,$blf)=split(/,/,$blines);
485	      $blf=$blb unless defined($blf);
486	      while ( $bStart < $blb ) {
487		$matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ;
488	      }
489	      $aStart==$alb+1 or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency in appended sequence near elements $aStart and $bStart";
490	      $bStart=$blf+1;
491	    }
492	    elsif ( ($alines,$blb) = ( m/^(\d*(?:,\d*)?)d(\d*)$/ ) ) {
493	      ($alb,$alf)=split(/,/,$alines);
494	      $alf=$alb unless defined($alf);
495	      while ( $aStart < $alb ) {
496		$matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ;
497	      }
498	      $bStart==$blb+1 or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency in deleted sequence near elements $aStart and $bStart";
499	      $aStart=$alf+1;
500	    }
501	    elsif ( m/^Binary files/ ) {
502	      # if diff reports it is a binary file force --text mode. I do not like
503	      # to always use this option because it is probably only available in GNU diff
504	      open(DIFFPIPE, "diff --text $fna $fnb |") or die "Cannot launch diff process. $!" ;
505	    }
506	    # Default: just skip line
507	  }
508	  while ($aStart <= $aFinish ) {
509	    $matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ;
510	  }
511	  $bStart==$bFinish+1  or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency at end";
512	  close DIFFPIPE;
513	  # check whether a system error has occurred or return status is greater than or equal to 5
514	  if ( $! || ($? >> 8) > 5) {
515	    print STDERR "diff process failed with exit code ", ($? >> 8), " $!\n";
516	    die;
517	  }
518	  unlink $fna,$fnb ;
519	}
520	return wantarray ? @$matchVector : $matchVector;
521}
522
523sub traverse_sequences
524{
525	my $a                 = shift;                                  # array ref
526	my $b                 = shift;                                  # array ref
527	my $callbacks         = shift || {};
528	my $keyGen            = shift;
529	my $matchCallback     = $callbacks->{'MATCH'} || sub { };
530	my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
531	my $finishedACallback = $callbacks->{'A_FINISHED'};
532	my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
533	my $finishedBCallback = $callbacks->{'B_FINISHED'};
534	my $matchVector = _longestCommonSubsequence( $a, $b, $keyGen, @_ );
535
536	# Process all the lines in @$matchVector
537	my $lastA = $#$a;
538	my $lastB = $#$b;
539	my $bi    = 0;
540	my $ai;
541
542	for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
543	{
544		my $bLine = $matchVector->[$ai];
545		if ( defined($bLine) )    # matched
546		{
547			&$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
548			&$matchCallback( $ai,    $bi++, @_ );
549		}
550		else
551		{
552			&$discardACallback( $ai, $bi, @_ );
553		}
554	}
555
556	# The last entry (if any) processed was a match.
557	# $ai and $bi point just past the last matching lines in their sequences.
558
559	while ( $ai <= $lastA or $bi <= $lastB )
560	{
561
562		# last A?
563		if ( $ai == $lastA + 1 and $bi <= $lastB )
564		{
565			if ( defined($finishedACallback) )
566			{
567				&$finishedACallback( $lastA, @_ );
568				$finishedACallback = undef;
569			}
570			else
571			{
572				&$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
573			}
574		}
575
576		# last B?
577		if ( $bi == $lastB + 1 and $ai <= $lastA )
578		{
579			if ( defined($finishedBCallback) )
580			{
581				&$finishedBCallback( $lastB, @_ );
582				$finishedBCallback = undef;
583			}
584			else
585			{
586				&$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
587			}
588		}
589
590		&$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
591		&$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
592	}
593
594	return 1;
595}
596
597sub traverse_balanced
598{
599	my $a                 = shift;                                  # array ref
600	my $b                 = shift;                                  # array ref
601	my $callbacks         = shift || {};
602	my $keyGen            = shift;
603	my $matchCallback     = $callbacks->{'MATCH'} || sub { };
604	my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
605	my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
606	my $changeCallback    = $callbacks->{'CHANGE'};
607	my $matchVector = _longestCommonSubsequence( $a, $b, $keyGen, @_ );
608
609	# Process all the lines in match vector
610	my $lastA = $#$a;
611	my $lastB = $#$b;
612	my $bi    = 0;
613	my $ai    = 0;
614	my $ma    = -1;
615	my $mb;
616
617	while (1)
618	{
619
620		# Find next match indices $ma and $mb
621		do { $ma++ } while ( $ma <= $#$matchVector && !defined $matchVector->[$ma] );
622
623		last if $ma > $#$matchVector;    # end of matchVector?
624		$mb = $matchVector->[$ma];
625
626		# Proceed with discard a/b or change events until
627		# next match
628		while ( $ai < $ma || $bi < $mb )
629		{
630
631			if ( $ai < $ma && $bi < $mb )
632			{
633
634				# Change
635				if ( defined $changeCallback )
636				{
637					&$changeCallback( $ai++, $bi++, @_ );
638				}
639				else
640				{
641					&$discardACallback( $ai++, $bi, @_ );
642					&$discardBCallback( $ai, $bi++, @_ );
643				}
644			}
645			elsif ( $ai < $ma )
646			{
647				&$discardACallback( $ai++, $bi, @_ );
648			}
649			else
650			{
651
652				# $bi < $mb
653				&$discardBCallback( $ai, $bi++, @_ );
654			}
655		}
656
657		# Match
658		&$matchCallback( $ai++, $bi++, @_ );
659	}
660
661	while ( $ai <= $lastA || $bi <= $lastB )
662	{
663		if ( $ai <= $lastA && $bi <= $lastB )
664		{
665
666			# Change
667			if ( defined $changeCallback )
668			{
669				&$changeCallback( $ai++, $bi++, @_ );
670			}
671			else
672			{
673				&$discardACallback( $ai++, $bi, @_ );
674				&$discardBCallback( $ai, $bi++, @_ );
675			}
676		}
677		elsif ( $ai <= $lastA )
678		{
679			&$discardACallback( $ai++, $bi, @_ );
680		}
681		else
682		{
683
684			# $bi <= $lastB
685			&$discardBCallback( $ai, $bi++, @_ );
686		}
687	}
688
689	return 1;
690}
691
692sub LCS
693{
694	my $a = shift;                                           # array ref
695	my $matchVector = _longestCommonSubsequence( $a, @_ );
696	my @retval;
697	my $i;
698	for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
699	{
700		if ( defined( $matchVector->[$i] ) )
701		{
702			push ( @retval, $a->[$i] );
703		}
704	}
705	return wantarray ? @retval : \@retval;
706}
707
708sub diff
709{
710	my $a      = shift;    # array ref
711	my $b      = shift;    # array ref
712	my $retval = [];
713	my $hunk   = [];
714	my $discard = sub { push ( @$hunk, [ '-', $_[0], $a->[ $_[0] ] ] ) };
715	my $add = sub { push ( @$hunk, [ '+', $_[1], $b->[ $_[1] ] ] ) };
716	my $match = sub { push ( @$retval, $hunk ) if scalar(@$hunk); $hunk = [] };
717	traverse_sequences( $a, $b,
718		{ MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
719	&$match();
720	return wantarray ? @$retval : $retval;
721}
722
723sub sdiff
724{
725	my $a      = shift;    # array ref
726	my $b      = shift;    # array ref
727	my $retval = [];
728	my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
729	my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
730	my $change = sub {
731		push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
732	};
733	my $match = sub {
734		push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
735	};
736	traverse_balanced(
737		$a,
738		$b,
739		{
740			MATCH     => $match,
741			DISCARD_A => $discard,
742			DISCARD_B => $add,
743			CHANGE    => $change,
744		},
745		@_
746	);
747	return wantarray ? @$retval : $retval;
748}
749
7501;
751}
752import Algorithm::Diff qw(traverse_sequences);
753# End of inserted block for stand-alone version
754
755
756use Getopt::Long ;
757use strict ;
758use warnings;
759use utf8 ;
760
761use File::Spec ;
762
763my ($algodiffversion)=split(/ /,$Algorithm::Diff::VERSION);
764
765
766my ($versionstring)=<<EOF ;
767This is LATEXDIFF 1.3.1.1 (Algorithm::Diff $Algorithm::Diff::VERSION, Perl $^V)
768  (c) 2004-2020 F J Tilmann
769EOF
770
771# Hash with defaults for configuration variables. These marked undef have default values constructed from list defined in the DATA block
772# (under tag CONFIG)
773my %CONFIG=(
774   MINWORDSBLOCK => 3, # minimum number of tokens to form an independent block
775                        # shorter identical blocks will be merged to the previous word
776   SCALEDELGRAPHICS => 0.5, # factor with which deleted figures will be scaled down (i.e. 0.5 implies they are shown at half linear size)
777                             # this is only used for --graphics-markup=BOTH option
778   FLOATENV => undef ,   # Environments in which FL variants of defined commands are used
779   PICTUREENV => undef ,   # Environments in which all change markup is removed
780   MATHENV => undef ,           # Environments turning on display math mode (code also knows about \[ and \])
781   MATHREPL => 'displaymath',  # Environment introducing deleted maths blocks
782   MATHARRENV => undef ,           # Environments turning on eqnarray math mode
783   MATHARRREPL => 'eqnarray*',  # Environment introducing deleted maths blocks
784   ARRENV => undef , # Environments making arrays in math mode.  The underlining style does not cope well with those - as a result in-text math environments are surrounded by \mbox{ } if any of these commands is used in an inline math block
785   COUNTERCMD => undef,
786                                        # COUNTERCMD textcmds which are associated with a counter
787                                        # If any of these commands occur in a deleted block
788                                        # they will be followed by an \addtocounter{...}{-1}
789                                        # for the associated counter such that the overall numbers
790                                        # should be the same as in the new file
791   LISTENV => undef ,  # list making environments - they will generally be kept
792   VERBATIMENV => undef,      # Environments whose content should be treated as verbatim text and not be touched
793   VERBATIMLINEENV => undef,  # Environments whose content should be treated as verbatim text and processed in line diff mode
794   CUSTOMDIFCMD => undef,# Custom dif command. Is defined in the document as a \DELcommand and \ADDcommand version to be replaced by the diff
795   ITEMCMD => 'item'                    # command marking item in a list environment
796);
797# Configuration variables: these have to be visible from the subroutines
798my ($ARRENV,
799    $COUNTERCMD,
800    $FLOATENV,
801    $ITEMCMD,
802    $LISTENV,
803    $MATHARRENV,
804    $MATHARRREPL,
805    $MATHENV,
806    $MATHREPL,
807    $MINWORDSBLOCK,
808    $PICTUREENV,
809    $SCALEDELGRAPHICS,
810    $VERBATIMENV,
811    $VERBATIMLINEENV,
812    $CUSTOMDIFCMD
813    );
814
815# my $MINWORDSBLOCK=3; # minimum number of tokens to form an independent block
816#                      # shorter identical blocks will be merged to the previous word
817# my $SCALEDELGRAPHICS=0.5; # factor with which deleted figures will be scaled down (i.e. 0.5 implies they are shown at half linear size)
818#                       # this is only used for --graphics-markup=BOTH option
819# my $FLOATENV='(?:figure|table|plate)[\w\d*@]*' ;   # Environments in which FL variants of defined commands are used
820# my $PICTUREENV='(?:picture|tikzpicture|DIFnomarkup)[\w\d*@]*' ;   # Environments in which all change markup is removed
821# my $MATHENV='(?:equation[*]?|displaymath|DOLLARDOLLAR)[*]?' ;           # Environments turning on display math mode (code also knows about \[ and \])
822# my $MATHREPL='displaymath';  # Environment introducing deleted maths blocks
823# my $MATHARRENV='(?:eqnarray|align|alignat|gather|multline|flalign)[*]?' ;           # Environments turning on eqnarray math mode
824# my $MATHARRREPL='eqnarray*';  # Environment introducing deleted maths blocks
825# my $ARRENV='(?:aligned|gathered|array|[pbvBV]?matrix|smallmatrix|cases|split)'; # Environments making arrays in math mode.  The underlining style does not cope well with those - as a result in-text math environments are surrounded by \mbox{ } if any of these commands is used in an inline math block
826# my $COUNTERCMD='(?:footnote|part|chapter|section|subsection|subsubsection|paragraph|subparagraph)';  # textcmds which are associated with a counter
827#                                         # If any of these commands occur in a deleted block
828#                                         # they will be succeeded by an \addtocounter{...}{-1}
829#                                         # for the associated counter such that the overall numbers
830#                                         # should be the same as in the new file
831# my $LISTENV='(?:itemize|description|enumerate)'; # list making environments - they will generally be kept
832# my $ITEMCMD='item';   # command marking item in a list environment
833
834
835
836my $LABELCMD='(?:label)';                # matching commands are disabled within deleted blocks - mostly useful for maths mode, as otherwise it would be fine to just not add those to SAFECMDLIST
837my @UNSAFEMATHCMD=('qedhere','intertext');           # Commands which are definitely unsafe for marking up in math mode (amsmath qedhere only tested to not work with UNDERLINE markup) (only affects WHOLE and COARSE math markup modes). Note that unlike text mode (or FINE math mode0 deleted unsafe commands are not deleted but simply taken outside \DIFdel
838my $MBOXINLINEMATH=0; # if set to 1 then surround marked-up inline maths expression with \mbox ( to get around compatibility
839                      # problems between some maths packages and ulem package
840
841
842# Markup strings
843# If at all possible, do not change these as parts of the program
844# depend on the actual name (particularly post-processing)
845# At the very least adapt subroutine postprocess to new tokens.
846my $ADDMARKOPEN='\DIFaddbegin ';   # Token to mark begin of appended text
847my $ADDMARKCLOSE='\DIFaddend ';   # Token to mark end of appended text
848my $ADDOPEN='\DIFadd{';  # To mark begin of added text passage
849my $ADDCLOSE='}';        # To mark end of added text passage
850my $ADDCOMMENT='DIF > ';   # To mark added comment line
851my $DELMARKOPEN='\DIFdelbegin ';   # Token to mark begin of deleted text
852my $DELMARKCLOSE='\DIFdelend ';   # Token to mark end of deleted text
853my $DELOPEN='\DIFdel{';  # To mark begin of deleted text passage
854my $DELCLOSE='}';        # To mark end of deleted text passage
855my $DELCMDOPEN='%DIFDELCMD < ';  # To mark begin of deleted commands (must begin with %, i.e., be a comment
856my $DELCMDCLOSE="%%%\n";    # To mark end of deleted commands (must end with a new line)
857my $AUXCMD='%DIFAUXCMD' ; #  follows auxiliary commands put in by latexdiff to make difference file legal
858                          # auxiliary commands must be on a line of their own
859                          # Note that for verbatim environment openings the %DIFAUXCMD cannot be placed in
860                          # the same line as this would mean they are shown
861                          # so the special form "%DIFAUXCMD NEXT" is used to indicate that the next line
862                          # is an auxiliary command
863                          # Similarly "%DIFAUXCMD LAST" would indicate the auxiliary command is in previous line (not currently used)
864my $DELCOMMENT='DIF < ';   # To mark deleted comment line
865my $VERBCOMMENT='DIFVRB ';  # to mark lines which are within a verbatim environment
866
867# main local variables:
868my @TEXTCMDLIST=();  # array containing patterns of commands with text arguments
869my @TEXTCMDEXCL=();  # array containing patterns of commands without text arguments (if a pattern
870                     # matches both TEXTCMDLIST and TEXTCMDEXCL it is excluded)
871my @CONTEXT1CMDLIST=();  # array containing patterns of commands with text arguments (subset of text commands),
872                         # but which cause confusion if used out of context (e.g. \caption).
873                         # In deleted passages, the command will be disabled but its argument is marked up
874                         # Otherwise they behave exactly like TEXTCMD's
875my @CONTEXT1CMDEXCL=();  # exclude list for above, but always empty
876my @CONTEXT2CMDLIST=();  # array containing patterns of commands with text arguments, but which fail or cause confusion
877                         # if used out of context (e.g. \title). They and their arguments will be disabled in deleted
878                         # passages
879my @CONTEXT2CMDEXCL=();  # exclude list for above, but always empty
880my @MATHTEXTCMDLIST=();  # treat like textcmd.  If a textcmd is in deleted or added block, just wrap the
881                         # whole content with \DIFadd or \DIFdel irrespective of content.  This functionality
882                         # is useful for pseudo commands \MATHBLOCK.. into which math environments are being
883                         # transformed
884my @MATHTEXTCMDEXCL=();  #
885
886# Note I need to declare this with "our" instead of "my" because later in the code I have to "local"ise these
887our @SAFECMDLIST=();  # array containing patterns of safe commands (which do not break when in the argument of DIFadd or DIFDEL)
888our @SAFECMDEXCL=();
889my @MBOXCMDLIST=();   # patterns for commands which are in principle safe but which need to be surrounded by an \mbox
890my @MBOXCMDEXCL=();           # all the patterns in MBOXCMDLIST will be appended to SAFECMDLIST
891
892my @KEEPCMDLIST=( qr/^bibitem$/ );   # patterns for commands which should not be deleted in nominally delete text passages
893my @KEEPCMDEXCL=();
894
895my ($i,$j,$l);
896my ($old,$new);
897my ($line,$key);
898my (@dumlist);
899my ($newpreamble,$oldpreamble);
900my (@newpreamble,@oldpreamble,@diffpreamble,@diffbody);
901my ($latexdiffpreamble);
902my ($oldbody, $newbody, $diffbo);
903my ($oldpost, $newpost);
904my ($diffall);
905# Option names
906my ($type,$subtype,$floattype,$config,$preamblefile,$encoding,$nolabel,$visiblelabel,
907    $filterscript,$ignorefilterstderr,
908    $showpreamble,$showsafe,$showtext,$showconfig,$showall,
909    $replacesafe,$appendsafe,$excludesafe,
910    $replacetext,$appendtext,$excludetext,
911    $replacecontext1,$appendcontext1,
912    $replacecontext2,$appendcontext2,
913    $help,$verbose,$driver,$version,$ignorewarnings,
914    $enablecitmark,$disablecitmark,$allowspaces,$flatten,$nolinks,$debug,$earlylatexdiffpreamble);  ###$disablemathmark,
915my ($mboxsafe);
916# MNEMNONICS for mathmarkup
917my $mathmarkup;
918use constant {
919  OFF => 0,
920  WHOLE => 1,
921  COARSE => 2,
922  FINE => 3
923};
924# MNEMNONICS for graphicsmarkup
925my $graphicsmarkup;
926use constant {
927  NONE => 0,
928  NEWONLY => 1,
929  BOTH => 2
930};
931
932my ($mboxcmd);
933
934my (@configlist,@addtoconfiglist,@labels,
935    @appendsafelist,@excludesafelist,
936    @appendmboxsafelist,@excludemboxsafelist,
937    @appendtextlist,@excludetextlist,
938    @appendcontext1list,@appendcontext2list,
939    @packagelist);
940my ($assign,@config);
941# Hash where keys corresponds to the names of  all included packages (including the documentclass as another package
942# the optional arguments to the package are the values of the hash elements
943my ($pkg,%packages);
944
945# Defaults
946$mathmarkup=COARSE;
947$verbose=0;
948# output debug and intermediate files, set to 0 in final distribution
949$debug=0;
950# insert preamble directly after documentclass - experimental feature, set to 0 in final distribution
951# Note that this failed with mini example (or other files, where packages used in latexdiff preamble
952# are called again with incompatible options in preamble of resulting file)
953$earlylatexdiffpreamble=0;
954
955
956# define character properties
957sub IsNonAsciiPunct { return <<'END'    # Unicode punctuation but excluding ASCII punctuation
958+utf8::IsPunct
959-utf8::IsASCII
960END
961}
962sub IsNonAsciiS { return <<'END'       # Unicode symbol but excluding ASCII
963+utf8::IsS
964-utf8::IsASCII
965END
966}
967
968
969my %verbhash;
970
971Getopt::Long::Configure('bundling');
972GetOptions('type|t=s' => \$type,
973	   'subtype|s=s' => \$subtype,
974	   'floattype|f=s' => \$floattype,
975	   'config|c=s' => \@configlist,
976	   'add-to-config=s' => \@addtoconfiglist,
977	   'preamble|p=s' => \$preamblefile,
978	   'encoding|e=s' => \$encoding,
979	   'label|L=s' => \@labels,
980	   'no-label' => \$nolabel,
981	   'visible-label' => \$visiblelabel,
982	   'exclude-safecmd|A=s' => \@excludesafelist,
983	   'replace-safecmd=s' => \$replacesafe,
984	   'append-safecmd|a=s' => \@appendsafelist,
985	   'exclude-textcmd|X=s' => \@excludetextlist,
986	   'replace-textcmd=s' => \$replacetext,
987	   'append-textcmd|x=s' => \@appendtextlist,
988	   'replace-context1cmd=s' => \$replacecontext1,
989	   'append-context1cmd=s' => \@appendcontext1list,
990	   'replace-context2cmd=s' => \$replacecontext2,
991	   'append-context2cmd=s' => \@appendcontext2list,
992	   'exclude-mboxsafecmd=s' => \@excludemboxsafelist,
993	   'append-mboxsafecmd=s' => \@appendmboxsafelist,
994	   'show-preamble' => \$showpreamble,
995	   'show-safecmd' => \$showsafe,
996	   'show-textcmd' => \$showtext,
997	   'show-config' => \$showconfig,
998	   'show-all' => \$showall,
999           'packages=s' => \@packagelist,
1000	   'allow-spaces' => \$allowspaces,
1001           'math-markup=s' => \$mathmarkup,
1002           'graphics-markup=s' => \$graphicsmarkup,
1003           'enable-citation-markup|enforce-auto-mbox' => \$enablecitmark,
1004           'disable-citation-markup|disable-auto-mbox' => \$disablecitmark,
1005	   'verbose|V' => \$verbose,
1006	   'ignore-warnings' => \$ignorewarnings,
1007	   'driver=s'=> \$driver,
1008	   'flatten' => \$flatten,
1009	   'filter-script=s' => \$filterscript,
1010       'ignore-filter-stderr' => \$ignorefilterstderr,
1011	   'no-links' => \$nolinks,
1012	   'version' => \$version,
1013	   'help|h' => \$help,
1014	   'debug!' => \$debug ) or die "Use latexdiff -h to get help.\n" ;
1015
1016if ( $help ) {
1017  usage() ;
1018}
1019
1020
1021if ( $version ) {
1022  die $versionstring ;
1023}
1024
1025print STDERR $versionstring if $verbose;
1026
1027if (defined($showall)){
1028  $showpreamble=$showsafe=$showtext=$showconfig=1;
1029}
1030# Default types
1031$type='UNDERLINE' unless defined($type);
1032$subtype='SAFE' unless defined($subtype);
1033# set floattype to IDENTICAL for LABEL and ONLYCHANGEDPAGE subtype, unless it has been set explicitly on the command line
1034$floattype=($subtype eq 'LABEL' || $subtype eq 'ONLYCHANGEDPAGE') ? 'IDENTICAL' : 'FLOATSAFE' unless defined($floattype);
1035if ( $subtype eq 'LABEL' ) {
1036  print STDERR "Note that LABEL subtype is deprecated. If possible, use ZLABEL instead (requires zref package)";
1037}
1038
1039if (defined($mathmarkup)) {
1040  $mathmarkup=~tr/a-z/A-Z/;
1041  if ( $mathmarkup eq 'OFF' ){
1042    $mathmarkup=OFF;
1043  } elsif ( $mathmarkup eq 'WHOLE' ){
1044    $mathmarkup=WHOLE;
1045  } elsif ( $mathmarkup eq 'COARSE' ){
1046    $mathmarkup=COARSE;
1047  } elsif ( $mathmarkup eq 'FINE' ){
1048    $mathmarkup=FINE;
1049  } elsif ( $mathmarkup !~ m/^[0123]$/ ) {
1050    die "latexdiff Illegal value: ($mathmarkup)  for option--math-markup. Possible values: OFF,WHOLE,COARSE,FINE,0-3\n";
1051  }
1052  # else use numerical value
1053}
1054
1055# Give filterscript a default empty string
1056$filterscript="" unless defined($filterscript);
1057
1058# setting extra preamble commands
1059if (defined($preamblefile)) {
1060  $latexdiffpreamble=join "\n",(extrapream($preamblefile),"");
1061} else {
1062  $latexdiffpreamble=join "\n",(extrapream($type,$subtype,$floattype),"");
1063}
1064
1065if ( defined($driver) ) {
1066  # for changebar only
1067  $latexdiffpreamble=~s/\[dvips\]/[$driver]/sg;
1068}
1069# setting up @SAFECMDLIST and @SAFECMDEXCL
1070if (defined($replacesafe)) {
1071  init_regex_arr_ext(\@SAFECMDLIST,$replacesafe);
1072} else {
1073  init_regex_arr_data(\@SAFECMDLIST, "SAFE COMMANDS");
1074}
1075foreach $appendsafe ( @appendsafelist ) {
1076  init_regex_arr_ext(\@SAFECMDLIST, $appendsafe);
1077}
1078foreach $excludesafe ( @excludesafelist ) {
1079  init_regex_arr_ext(\@SAFECMDEXCL, $excludesafe);
1080}
1081# setting up @MBOXCMDLIST and @MBOXCMDEXCL
1082foreach $mboxsafe ( @appendmboxsafelist ) {
1083  init_regex_arr_ext(\@MBOXCMDLIST, $mboxsafe);
1084}
1085foreach $mboxsafe ( @excludemboxsafelist ) {
1086  init_regex_arr_ext(\@MBOXCMDEXCL, $mboxsafe);
1087}
1088
1089
1090
1091# setting up @TEXTCMDLIST and @TEXTCMDEXCL
1092if (defined($replacetext)) {
1093  init_regex_arr_ext(\@TEXTCMDLIST,$replacetext);
1094} else {
1095  init_regex_arr_data(\@TEXTCMDLIST, "TEXT COMMANDS");
1096}
1097foreach $appendtext ( @appendtextlist ) {
1098  init_regex_arr_ext(\@TEXTCMDLIST, $appendtext);
1099}
1100foreach $excludetext ( @excludetextlist ) {
1101  init_regex_arr_ext(\@TEXTCMDEXCL, $excludetext);
1102}
1103
1104
1105# setting up @CONTEXT1CMDLIST ( @CONTEXT1CMDEXCL exist but is always empty )
1106if (defined($replacecontext1)) {
1107  init_regex_arr_ext(\@CONTEXT1CMDLIST,$replacecontext1);
1108} else {
1109  init_regex_arr_data(\@CONTEXT1CMDLIST, "CONTEXT1 COMMANDS");
1110}
1111foreach $appendcontext1 ( @appendcontext1list ) {
1112  init_regex_arr_ext(\@CONTEXT1CMDLIST, $appendcontext1);
1113}
1114
1115
1116# setting up @CONTEXT2CMDLIST ( @CONTEXT2CMDEXCL exist but is always empty )
1117if (defined($replacecontext2)) {
1118  init_regex_arr_ext(\@CONTEXT2CMDLIST,$replacecontext2);
1119} else {
1120  init_regex_arr_data(\@CONTEXT2CMDLIST, "CONTEXT2 COMMANDS");
1121}
1122foreach $appendcontext2 ( @appendcontext2list ) {
1123  init_regex_arr_ext(\@CONTEXT2CMDLIST, $appendcontext2);
1124}
1125
1126# setting configuration variables
1127@config=();
1128foreach $config ( @configlist ) {
1129  if (-f $config || lc $config eq '/dev/null' ) {
1130    open(FILE,$config) or die ("Couldn't open configuration file $config: $!");
1131    while (<FILE>) {
1132      chomp;
1133      next if /^\s*#/ || /^\s*%/ || /^\s*$/ ;
1134      push (@config,$_);
1135    }
1136    close(FILE);
1137  }
1138  else {
1139#    foreach ( split(",",$config) ) {
1140#      push @config,$_;
1141#    }
1142     push @config,split(",",$config)
1143  }
1144}
1145foreach $assign ( @config ) {
1146  $assign=~ m/\s*(\w*)\s*=\s*(\S*)\s*$/ or die "Illegal assignment $assign in configuration list (must be variable=value)";
1147  exists $CONFIG{$1} or die "Unknown configuration variable $1.";
1148  $CONFIG{$1}=$2;
1149}
1150
1151my @addtoconfig=();
1152foreach $config ( @addtoconfiglist ) {
1153  if (-f $config || lc $config eq '/dev/null' ) {
1154    open(FILE,$config) or die ("Couldn't open addd-to-config file $config: $!");
1155    while (<FILE>) {
1156      chomp;
1157      next if /^\s*#/ || /^\s*%/ || /^\s*$/ ;
1158      push (@addtoconfig,$_);
1159    }
1160    close(FILE);
1161  }
1162  else {
1163#    foreach ( split(",",$config) ) {
1164#      push @addtoconfig,$_;
1165#    }
1166     push @addtoconfig,split(",",$config)
1167  }
1168}
1169
1170# initialise default lists from DATA
1171# for those configuration variables, which have not been set explicitly, initiate from list in document
1172foreach $key ( keys(%CONFIG) ) {
1173  if (!defined $CONFIG{$key}) {
1174    @dumlist=();
1175    init_regex_arr_data(\@dumlist,"$key CONFIG");
1176    $CONFIG{$key}=join(";",@dumlist)
1177  }
1178}
1179
1180
1181foreach $assign ( @addtoconfig ) {
1182  ###print STDERR "assign:|$assign|\n";
1183  $assign=~ m/\s*(\w*)\s*=\s*(\S*)\s*$/ or die "Illegal assignment $assign in configuration list (must be variable=value)";
1184  exists $CONFIG{$1} or die "Unknown configuration variable $1.";
1185  $CONFIG{$1}.=";$2";
1186}
1187
1188# Map from hash to variables (we do this to have more concise code later, change from comma-separated list)
1189foreach  ( keys(%CONFIG) ) {
1190  if ( $_ eq "MINWORDSBLOCK" ) { $MINWORDSBLOCK = $CONFIG{$_}; }
1191  elsif ( $_ eq "FLOATENV" ) { $FLOATENV = liststringtoregex($CONFIG{$_}) ; }
1192  elsif ( $_ eq "ITEMCMD" ) { $ITEMCMD = $CONFIG{$_} ; }
1193  elsif ( $_ eq "LISTENV" ) { $LISTENV = liststringtoregex($CONFIG{$_}) ; }
1194  elsif ( $_ eq "PICTUREENV" ) { $PICTUREENV = liststringtoregex($CONFIG{$_}) ; }
1195  elsif ( $_ eq "MATHENV" ) { $MATHENV = liststringtoregex($CONFIG{$_}) ; }
1196  elsif ( $_ eq "MATHREPL" ) { $MATHREPL = $CONFIG{$_} ; }
1197  elsif ( $_ eq "MATHARRENV" ) { $MATHARRENV = liststringtoregex($CONFIG{$_}) ; }
1198  elsif ( $_ eq "MATHARRREPL" ) { $MATHARRREPL = $CONFIG{$_} ; }
1199  elsif ( $_ eq "ARRENV" ) { $ARRENV = liststringtoregex($CONFIG{$_}) ; }
1200  elsif ( $_ eq "VERBATIMENV" ) { $VERBATIMENV = liststringtoregex($CONFIG{$_}) ; }
1201  elsif ( $_ eq "VERBATIMLINEENV" ) { $VERBATIMLINEENV = liststringtoregex($CONFIG{$_}) ; }
1202  elsif ( $_ eq "CUSTOMDIFCMD" ) { $CUSTOMDIFCMD = liststringtoregex($CONFIG{$_}) ; }
1203  elsif ( $_ eq "COUNTERCMD" ) { $COUNTERCMD = liststringtoregex($CONFIG{$_}) ; }
1204  elsif ( $_ eq "SCALEDELGRAPHICS" ) { $SCALEDELGRAPHICS = $CONFIG{$_} ; }
1205  else { die "Unknown configuration variable $_.";}
1206}
1207
1208if ( $mathmarkup == COARSE || $mathmarkup == WHOLE ) {
1209  push(@MATHTEXTCMDLIST,qr/^MATHBLOCK(?:$MATHENV|$MATHARRENV|SQUAREBRACKET)$/);
1210}
1211
1212
1213
1214
1215
1216foreach $pkg ( @packagelist ) {
1217  map { $packages{$_}="" } split(/,/,$pkg) ;
1218}
1219
1220
1221if ($showconfig || $showtext || $showsafe || $showpreamble) {
1222
1223  show_configuration();
1224  exit 0;
1225}
1226
1227
1228if ( @ARGV != 2 ) {
1229  print STDERR "2 and only 2 non-option arguments required.  Write latexdiff -h to get help\n";
1230  exit(2);
1231}
1232
1233# Are extra spaces between command arguments permissible?
1234my $extraspace;
1235if ($allowspaces) {
1236  $extraspace='\s*';
1237} else {
1238  $extraspace='';
1239}
1240
1241# append context lists to text lists (as text property is implied)
1242push @TEXTCMDLIST, @CONTEXT1CMDLIST;
1243push @TEXTCMDLIST, @CONTEXT2CMDLIST;
1244
1245push @TEXTCMDLIST, @MATHTEXTCMDLIST if $mathmarkup==COARSE;
1246
1247# internal additions to SAFECMDLIST
1248push(@SAFECMDLIST, qr/^QLEFTBRACE$/, qr/^QRIGHTBRACE$/);
1249
1250
1251# Patterns. These are used by some of the subroutines, too
1252# I can only define them down here because value of extraspace depends on an option
1253
1254  my $pat0 = '(?:[^{}])*';
1255  my $pat_n = $pat0;
1256# if you get "undefined control sequence MATHBLOCKmath" error, increase the maximum value in this loop
1257  for (my $i_pat = 0; $i_pat < 20; ++$i_pat){
1258    $pat_n = '(?:[^{}]|\{'.$pat_n.'\}|\\\\\{|\\\\\})*';
1259    # Actually within the text body, quoted braces are replaced in pre-processing. The only place where
1260    # the last part of the pattern matters is when processing the arguments of context2cmds in the preamble
1261    # and these contain a \{ or \} combination, probably rare.
1262    # It should thus be fine to use the simpler version below.
1263    ###  $pat_n = '(?:[^{}]|\{'.$pat_n.'\})*';
1264  }
1265
1266  my $brat0 = '(?:[^\[\]]|\\\[|\\\])*';
1267  my $brat_n = $brat0;
1268  for (my $i_pat = 0; $i_pat < 4; ++$i_pat){
1269    $brat_n = '(?:[^\[\]]|\['.$brat_n.'\]|\\\[|\\\])*';
1270    ###  $brat_n = '(?:[^\[\]]|\['.$brat_n.'\])*';   # Version not taking into account escaped \[ and \]
1271  }
1272  my $abrat0 = '(?:[^<>])*';
1273
1274  my $quotemarks = '(?:\'\')|(?:\`\`)';
1275  my $punct='[0.,\/\'\`:;\"\?\(\)\[\]!~\p{IsNonAsciiPunct}\p{IsNonAsciiS}]';
1276  my $number='-?\d*\.\d*';
1277  my $mathpunct='[+=<>\-\|]';
1278  my $and = '&';
1279  my $coords= '[\-.,\s\d]*';
1280# quoted underscore - this needs special treatment as perl treats _ as a letter (\w) but latex does not
1281# such that a\_b is interpreted as a{\_}b by latex but a{\_b} by perl
1282  my $quotedunderscore='\\\\_';
1283# word: sequence of letters or accents followed by letter
1284  my $word_ja='\p{Han}+|\p{InHiragana}+|\p{InKatakana}+';
1285  my $word='(?:' . $word_ja . '|(?:(?:[-\w\d*]|\\\\[\"\'\`~^][A-Za-z\*])(?!(?:' . $word_ja . ')))+)';
1286
1287  # for selected commands, the number of arguments is known, and we can therefore allow spaces between command and its argument
1288  # Note that it is still expected that the arguments are blocks marked by parentheses rather than single characters, and that intervening comments will inhibit the association
1289  my $predefinedcmdoptseq01='\\\\(?:url|BibitemShut)\s*\s*(?:\{'. $pat_n . '\}\s*){1}';  # Commands with one non-optional argument
1290  my $predefinedcmdoptseq12='\\\\(?:href|bibfield|bibinfo)\s*(?:\['.$brat_n.'\])?\s*(?:\{'. $pat_n . '\}\s*){2}';  # Commands with one optional and two non-optional arguments
1291#  my $predefinedcmdoptseq11='\\\\(?:bibitem)\s*(?:\['.$brat_n.'\])?\s*(?:\{'. $pat_n . '\}\s*){1}';  # Commands with one optional and one non-optional arguments
1292# \bibitem in revtex styles appears to be always followed by \BibItemOpen. We bind \BibItemOpen to the bibitem (if present) in order to prevent the comparison algorithm to interpret the \BibItemOpen as an identical part of the sequence; this interpretation can lead to added and removed entries to the reference list to become mixed.
1293  my $predefinedbibitem='\\\\(?:bibitem)\s*(?:\['.$brat_n.'\])?\s*(?:\{'. $pat_n . '\})(?:%?\s*\\\\BibitemOpen)?';  # Commands with one optional and one non-optional arguments
1294
1295  my $predefinedcmdoptseq='(?:'.$predefinedcmdoptseq12.'|'.$predefinedcmdoptseq01.'|'.$predefinedbibitem.')';
1296
1297  my $cmdleftright='\\\\(?:left|right|[Bb]igg?[lrm]?|middle)\s*(?:[<>()\[\]|\.]|\\\\(?:[|{}]|\w+))';
1298  my $cmdoptseq='\\\\[\w\d@\*]+'.$extraspace.'(?:(?:<'.$abrat0.'>|\['.$brat_n.'\]|\{'. $pat_n . '\}|\(' . $coords .'\))'.$extraspace.')*';
1299  my $defseq='\\\\def\\\\[\w\d@\*]+(?:#\d+|\[#\d+\])+(?:\{'. $pat_n . '\})?';
1300  my $backslashnl='\\\\\n';
1301  my $oneletcmd='\\\\.\*?(?:\['.$brat_n.'\]|\{'. $pat_n . '\})*';
1302  my $math='\$(?:[^$]|\\\$)*?\$|\\\\[(](?:.|\n)*?\\\\[)]';
1303## the current maths command cannot cope with newline within the math expression
1304  my $comment='%[^\n]*\n';
1305  my $pat=qr/(?:\A\s*)?(?:${and}|${quotemarks}|${number}|${word}|$quotedunderscore|${defseq}|$cmdleftright|${predefinedcmdoptseq}|${cmdoptseq}|${math}|${backslashnl}|${oneletcmd}|${comment}|${punct}|${mathpunct}|\{|\})\s*/ ;
1306
1307
1308
1309
1310# now we are done setting up and can start working
1311my ($oldfile, $newfile) = @ARGV;
1312# check for existence of input files
1313if ( ! -e $oldfile ) {
1314  die "Input file $oldfile does not exist";
1315}
1316if ( ! -e $newfile ) {
1317  die "Input file $newfile does not exist";
1318}
1319
1320
1321# set the labels to be included into the file
1322# first find out which file name is longer for correct alignment
1323my ($diff,$oldlabel_n_spaces,$newlabel_n_spaces);
1324$oldlabel_n_spaces = 0;
1325$newlabel_n_spaces = 0;
1326$diff = length($newfile) - length($oldfile);
1327if ($diff > 0) {
1328  $oldlabel_n_spaces = $diff;
1329}
1330if ($diff < 0) {
1331  $newlabel_n_spaces = abs($diff);
1332}
1333
1334my ($oldtime,$newtime,$oldlabel,$newlabel);
1335if (defined($labels[0])) {
1336  $oldlabel=$labels[0] ;
1337} else {
1338  $oldtime=localtime((stat($oldfile))[9]);
1339  $oldlabel="$oldfile   " . " "x($oldlabel_n_spaces) . $oldtime;
1340}
1341if (defined($labels[1])) {
1342  $newlabel=$labels[1] ;
1343} else {
1344  $newtime=localtime((stat($newfile))[9]);
1345  $newlabel="$newfile   " . " "x($newlabel_n_spaces) . $newtime;
1346}
1347
1348$encoding=guess_encoding($newfile) unless defined($encoding);
1349
1350$encoding = "utf8" if $encoding =~ m/^utf8/i ;
1351if (lc($encoding) eq "utf8" ) {
1352  binmode(STDOUT, ":utf8");
1353  binmode(STDERR, ":utf8");
1354}
1355
1356# filter($text)
1357# Runs $text through the script provided in $filterscript argument, if set
1358# If not set, just returns $text unchanged.
1359# If flatten was set, defer filtering to flatten.  flatten will run the filter
1360# on all incoming text prior to its own processing.
1361# If flatten was not set, filter each of old and new once (see just below this def)
1362sub filter {
1363  my ($text)=@_;
1364  my ($textout,$pid);
1365  if ($filterscript ne "") {
1366    print STDERR "Passing " . length($text) . " chars to filter script " . $filterscript . "\n" if $verbose;
1367
1368    if ($ignorefilterstderr) {
1369        # If we need to capture and bury STDERR, use the Open3 version, and close CHLD_ERR below.
1370        use IPC::Open3;
1371        # We consume STDERR from the process, and hide it
1372        $pid = open3(\*CHLD_IN, \*CHLD_OUT, \*CHLD_ERR, $filterscript) or die "open3() failed $!";
1373    }
1374    else {
1375        # Capture STDOUT and use as our new $text.  Allow STDERR to go to console.
1376        use IPC::Open2;
1377        $pid = open2(\*CHLD_OUT, \*CHLD_IN, $filterscript) or die "open2() failed $!";
1378    }
1379    # Send in $text
1380    print CHLD_IN $text."\n";  # Adding a newline just to make sure there is one.
1381    close CHLD_IN;
1382    # Wait for output and gather it up
1383    while (<CHLD_OUT>) {
1384      $textout = $textout.$_;
1385    }
1386    if ($ignorefilterstderr) {
1387        close CHLD_ERR;       # Enable only if Open3 used above
1388    }
1389    # On the off chance a very long running and/or frequently called script is used.
1390    waitpid( $pid, 0 );
1391    $text = $textout;
1392    print STDERR "Received " . length($text) . " chars after filtering\n" if $verbose;
1393    print STDERR $text if $verbose;
1394  }
1395  return $text;
1396}
1397
1398$old=read_file_with_encoding($oldfile,$encoding);
1399$new=read_file_with_encoding($newfile,$encoding);
1400
1401if (not defined($flatten)) {
1402    $old=filter($old);
1403    $new=filter($new);
1404}
1405
1406
1407
1408
1409# reset time
1410exetime(1);
1411($oldpreamble,$oldbody,$oldpost)=splitdoc($old,'\\\\begin\{document\}','\\\\end\{document\}');
1412
1413($newpreamble,$newbody,$newpost)=splitdoc($new,'\\\\begin\{document\}','\\\\end\{document\}');
1414
1415
1416if ($flatten) {
1417  $oldbody=flatten($oldbody,$oldpreamble,File::Spec->rel2abs($oldfile),$encoding);
1418  $newbody=flatten($newbody,$newpreamble,File::Spec->rel2abs($newfile),$encoding);
1419  # flatten preamble
1420  $oldpreamble=flatten($oldpreamble,$oldpreamble,File::Spec->rel2abs($oldfile),$encoding);
1421  $newpreamble=flatten($newpreamble,$newpreamble,File::Spec->rel2abs($newfile),$encoding);
1422
1423}
1424
1425
1426
1427
1428my @auxlines;
1429
1430# boolean variab
1431my ($ulem)=0;
1432
1433if ( length $oldpreamble && length $newpreamble ) {
1434  # pre-process preamble by looking for commands used in \maketitle (title, author, date etc commands)
1435  # and marking up content with latexdiff markup
1436  @auxlines=preprocess_preamble($oldpreamble,$newpreamble);
1437
1438  @oldpreamble = split /\n/, $oldpreamble;
1439  @newpreamble = split /\n/, $newpreamble;
1440
1441  # If a command is defined in the preamble of the new file, and only uses safe commands, then it can be considered to be safe) (contribution S. Gouezel)
1442  # Base this assessment on the new preamble
1443  add_safe_commands($newpreamble);
1444
1445  # get a list of packages from preamble if not predefined
1446  %packages=list_packages($newpreamble) unless %packages;
1447  if ( %packages && $debug ) { my $key ; foreach $key (keys %packages) { print STDERR "DEBUG \\usepackage[",$packages{$key},"]{",$key,"}\n" ;} }
1448}
1449
1450# have to return to all processing to properly add preamble additions based on packages found
1451if (defined($graphicsmarkup)) {
1452  $graphicsmarkup=~tr/a-z/A-Z/;
1453  if ( $graphicsmarkup eq 'OFF' or $graphicsmarkup eq 'NONE' ) {
1454    $graphicsmarkup=NONE;
1455  } elsif ( $graphicsmarkup eq 'NEWONLY' or $graphicsmarkup eq 'NEW-ONLY' ) {
1456    $graphicsmarkup=NEWONLY;
1457  } elsif ( $graphicsmarkup eq 'BOTH' ) {
1458    $graphicsmarkup=BOTH;
1459  } elsif ( $graphicsmarkup !~ m/^[012]$/ ) {
1460    die "latexdiff Illegal value: ($graphicsmarkup)  for option --highlight-graphics. Possible values: OFF,WHOLE,COARSE,FINE,0-2\n";
1461  }
1462  # else use numerical value
1463} else {
1464  # Default: no explicit setting in menu
1465  if ( defined $packages{"graphicx"} or defined $packages{"graphics"} ) {
1466    $graphicsmarkup=NEWONLY;
1467  } else {
1468    $graphicsmarkup=NONE;
1469  }
1470}
1471
1472if (defined $packages{"hyperref"} ) {
1473  # deleted lines should not generate or appear in link names:
1474  print STDERR "hyperref package detected.\n" if $verbose ;
1475  $latexdiffpreamble =~ s/\{\\DIFadd\}/{\\DIFaddtex}/g;
1476  $latexdiffpreamble =~ s/\{\\DIFdel\}/{\\DIFdeltex}/g;
1477  $latexdiffpreamble .= join "\n",(extrapream("HYPERREF"),"");
1478  if($nolinks){
1479    $latexdiffpreamble .= "\n\\hypersetup{bookmarks=false}";
1480  }
1481  ###    $latexdiffpreamble .= '%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF FOR HYPERREF PACKAGE' . "\n";
1482  ###    $latexdiffpreamble .= '\providecommand{\DIFadd}[1]{\texorpdfstring{\DIFaddtex{#1}}{#1}}' . "\n";
1483  ###    $latexdiffpreamble .= '\providecommand{\DIFdel}[1]{\texorpdfstring{\DIFdeltex{#1}}{}}' . "\n";
1484  ###    $latexdiffpreamble .= '%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF FOR HYPERREF PACKAGE' . "\n";
1485}
1486
1487# add commands for figure highlighting to preamble
1488if ($graphicsmarkup != NONE ) {
1489  my @matches;
1490  # Check if \DIFaddbeginFL definition calls \DIFaddbegin - if so we will issue an error message that graphics highlighting is
1491  # is not compatible with this.
1492  # (A more elegant solution would be to suppress the redefinitions of the \DIFaddbeginFL etc commands, but for this narrow use case
1493  #  I currently don't see this as an efficient use of time)
1494  ### The foreach loop does not make sense here. I don't know why I put this in -  (F Tilmann)
1495  ###foreach my $cmd ( "DIFaddbegin","DIFaddend","DIFdelbegin","DIFdelend" ) {
1496  @matches=( $latexdiffpreamble =~ m/command\{\\DIFaddbeginFL}\{($pat_n)}/sg ) ;
1497  # we look at the last one of the list to take into account possible redefinition but almost always matches should have exactly one element
1498  if ( $matches[$#matches] =~ m/\\DIFaddbegin/ ) {
1499    die "Cannot combine graphics markup with float styles defining \\DIFaddbeginFL in terms of \\DIFaddbegin. Use --graphics-markup=none option or choose a different float style.";
1500    exit 10;
1501  }
1502  ###}
1503  $latexdiffpreamble .= join "\n",("\\newcommand{\\DIFscaledelfig}{$SCALEDELGRAPHICS}",extrapream("HIGHLIGHTGRAPHICS"),"");
1504
1505  # only change required for highlighting both is to declare \includegraphics safe, as preamble already contains commands for deleted environment
1506  if ( $graphicsmarkup == BOTH ) {
1507    init_regex_arr_list(\@SAFECMDLIST,'includegraphics');
1508  }
1509}
1510
1511$ulem = ($latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{ulem\}/ || defined $packages{"ulem"});
1512
1513
1514# If listings is being used or can be found in the latexdiff search path, add to the preamble auxiliary code to enable line-by-line markup
1515if ( defined($packages{"listings"}) or `kpsewhich listings.sty` ne "" ) {
1516  my @listingpreamble=extrapream("LISTINGS");
1517  my @listingDIFcode=();
1518  my $replaced;
1519  # note that in case user supplies preamblefile the type might not reflect well the
1520  @listingDIFcode=extrapream("-nofail","DIFCODE_" . $type) unless defined($preamblefile);
1521  if (!(@listingDIFcode)) {
1522    # if listingDIFcode is empty try to guess a suitable one from the preamble
1523    if ($latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{color\}/ and $ulem ) {
1524      @listingDIFcode=extrapream("DIFCODE_UNDERLINE");
1525    } elsif ( $latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{color\}/ ) {
1526      # only colour used
1527      @listingDIFcode=extrapream("DIFCODE_CFONT");
1528    } else {
1529      # fall-back solution
1530      @listingDIFcode=extrapream("DIFCODE_BOLD");
1531    }
1532  }
1533  # now splice it in
1534  $replaced=0;
1535  ###print STDERR "DEBUG: listingDIFcode: ",join("\n",@listingDIFcode),"|||\n" if $debug;
1536
1537  @listingpreamble=grep {
1538    # only replace if this has not been done already (use short-circuit property of and)
1539    if (!$replaced and $_ =~ s/^.*%DIFCODE TEMPLATE.*$/join("\n",@listingDIFcode)/e ) {
1540      ###print STDERR "DEBUG: Replaced text $_\n" if $debug;
1541      $replaced=1;
1542      1;
1543    } else {
1544      # return false for those lines matching %DIFCODE TEMPLATE (so that they are not included in output)
1545      not m/%DIFCODE TEMPLATE/;
1546    }
1547  } @listingpreamble;
1548  ###  print STDERR "DEBUG: listingpreamble @listingpreamble\n";
1549  $latexdiffpreamble .= join "\n",(@listingpreamble,"");
1550} else {
1551  print STDERR "WARNING: listings package not detected. Disabling mark-up in verbatim environments \n" ;
1552  # if listings does not exist disable line-by-line markup and treat all verbatim environments as opaque
1553  $VERBATIMENV = liststringtoregex($CONFIG{VERBATIMENV}.";".$CONFIG{VERBATIMLINEENV});
1554  $VERBATIMLINEENV = "";
1555}
1556
1557# adding begin and end marker lines to preamble
1558$latexdiffpreamble = "%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF\n" . $ latexdiffpreamble . "%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF\n";
1559
1560# and return to preamble specific processing
1561if ( length $oldpreamble && length $newpreamble ) {
1562  print STDERR "Differencing preamble.\n" if $verbose;
1563
1564  # insert dummy first line such that line count begins with line 1 (rather than perl's line 0) - just so that line numbers inserted by linediff are correct
1565  unshift @newpreamble,'';
1566  unshift @oldpreamble,'';
1567  @diffpreamble = linediff(\@oldpreamble, \@newpreamble);
1568  # remove dummy line again
1569  shift @diffpreamble;
1570  # add filenames, modification time and latexdiff mark
1571  defined($nolabel) or splice @diffpreamble,1,0,
1572      "%DIF LATEXDIFF DIFFERENCE FILE",
1573      ,"%DIF DEL $oldlabel",
1574      "%DIF ADD $newlabel";
1575  if ( @auxlines ) {
1576    push @diffpreamble,"%DIF DELETED TITLE COMMANDS FOR MARKUP";
1577    push @diffpreamble,join("\n",@auxlines);
1578  }
1579  if ( $earlylatexdiffpreamble) {
1580    # insert latexdiff command directly after documentclass at beginning of preamble
1581    # note that grep is only run for its side effect
1582    ( grep { s/^([^%]*\\documentclass.*)$/$1$latexdiffpreamble/ } @diffpreamble )==1 or die "Could not find documentclass statement in preamble";
1583  } else {
1584    # insert latexdiff commands at the end of preamble (default behaviour)
1585    push @diffpreamble,$latexdiffpreamble;
1586  }
1587  push @diffpreamble,'\begin{document}';
1588  if (defined $packages{"hyperref"} && $nolinks) {
1589    push @diffpreamble, '\begin{NoHyper}';
1590  }
1591}
1592elsif ( !length $oldpreamble && !length $newpreamble ) {
1593  @diffpreamble=();
1594} else {
1595  print STDERR "Either both texts must have preamble or neither text must have the preamble.\n";
1596  exit(2);
1597}
1598
1599# Special: treat all cite commands as safe except in UNDERLINE and FONTSTRIKE mode
1600# (there is a conflict between citation and ulem package, see
1601# package documentation)
1602# Use post-processing
1603# and $packages{"apacite"}!~/natbibpapa/
1604
1605
1606if (defined $packages{"units"}  && $ulem ) {
1607  # protect inlined maths environments by surrounding with an \mbox
1608  # this is done to get around an incompatibility between the ulem and units package
1609  # where spaces in the argument to underlined or crossed-out \unit commands cause an error message
1610  print STDERR "units package detected at the same time as style using ulem.\n" if $verbose ;
1611  $MBOXINLINEMATH=1;
1612}
1613
1614if (defined $packages{"siunitx"} ) {
1615  # protect SI command by surrounding them with an \mbox
1616  # this is done to get around an incompatibility between the ulem and siunitx package
1617  print STDERR "siunitx package detected.\n" if $verbose ;
1618  my $mboxcmds='SI,ang,numlist,numrange,SIlist,SIrange';
1619  init_regex_arr_list(\@SAFECMDLIST,'num,si');
1620  if ( $enablecitmark || ( $ulem  && ! $disablecitmark )) {
1621    init_regex_arr_list(\@MBOXCMDLIST,$mboxcmds);
1622  } else {
1623    init_regex_arr_list(\@SAFECMDLIST,$mboxcmds);
1624  }
1625}
1626
1627if (defined $packages{"cleveref"} ) {
1628  # protect selected command by surrounding them with an \mbox
1629  # this is done to get around an incompatibility between ulem and cleveref package
1630  print STDERR "cleveref package detected.\n" if $verbose ;
1631  my $mboxcmds='[Cc]ref(?:range)?\*?,labelcref,(?:lc)?name[cC]refs?' ;
1632  if ( $enablecitmark || ( $ulem  && ! $disablecitmark )) {
1633    init_regex_arr_list(\@MBOXCMDLIST,$mboxcmds);
1634  } else {
1635    init_regex_arr_list(\@SAFECMDLIST,$mboxcmds);
1636  }
1637}
1638
1639if (defined $packages{"glossaries"} ) {
1640  # protect selected command by surrounding them with an \mbox
1641  # this is done to get around an incompatibility between ulem and glossaries package
1642  print STDERR "glossaries package detected.\n" if $verbose ;
1643  my $mboxcmds='[gG][lL][sS](?:|pl|disp|link|first|firstplural|desc|user[iv][iv]?[iv]?),[aA][cC][rR](?:long|longpl|full|fullpl),[aA][cC][lfp]?[lfp]?';
1644  init_regex_arr_list(\@SAFECMDLIST,'[gG][lL][sS](?:(?:entry)?(?:text|plural|name|symbol)|displaynumberlist|entryfirst|entryfirstplural|entrydesc|entrydescplural|entrysymbolplural|entryuser[iv][iv]?[iv]?|entrynumberlist|entrydisplaynumberlist|entrylong|entrylongpl|entryshort|entryshortpl|entryfull|entryfullpl),[gG]lossentry(?:name|desc|symbol),[aA][cC][rR](?:short|shortpl),[aA]csp?');
1645  if ( $enablecitmark || ( $ulem  && ! $disablecitmark )) {
1646    init_regex_arr_list(\@MBOXCMDLIST,$mboxcmds);
1647  } else {
1648    init_regex_arr_list(\@SAFECMDLIST,$mboxcmds);
1649  }
1650}
1651
1652if (defined $packages{"chemformula"} or defined $packages{"chemmacros"} ) {
1653  print STDERR "chemformula package detected.\n" if $verbose ;
1654  init_regex_arr_list(\@SAFECMDLIST,'ch');
1655  push(@UNSAFEMATHCMD,'ch');
1656  # The next command would be needed to allow highlighting the interior of \ch commands in math environments
1657  # but the redefinitions in chemformula are too deep to make this viable
1658  # push(@MATHTEXTCMDLIST,'ch');
1659}
1660
1661if (defined $packages{"mhchem"} ) {
1662  print STDERR "mhchem package detected.\n" if $verbose ;
1663  init_regex_arr_list(\@SAFECMDLIST,'ce');
1664  push(@UNSAFEMATHCMD,'ce','cee');
1665  # The next command would be needed to allow highlighting the interior of \cee commands in math environments
1666  # but the redefinitions in chemformula are too deep to make this viable
1667  # push(@MATHTEXTCMDLIST,'cee');
1668}
1669
1670
1671my ( $citpat);
1672
1673if ( defined $packages{"apacite"}  ) {
1674  print STDERR "apacite package detected.\n" if $verbose ;
1675  $citpat='(?:mask)?(?:full|short|no)?cite(?:A|author|year|meta)?(?:NP)?';
1676} elsif (defined $packages{"biblatex"}) {
1677  print STDERR "biblatex package detected.\n" if $verbose ;
1678  $citpat='(?:[cC]ites?|(?:[pP]aren|foot|[Tt]ext|[sS]mart|super)cites?\*?|footnotecitetex)';
1679} else {
1680  # citation command pattern for all other citation schemes
1681  $citpat='(?:cite\w*|nocite)';
1682};
1683
1684if ( ! $ulem ) {
1685  # modes not using ulem: citation is safe
1686  push (@SAFECMDLIST, $citpat);
1687} else {
1688  ### Experimental: disable text and emph commands
1689  push(@SAFECMDEXCL, qr/^emph$/, qr/^text..$/);
1690  # replace \cite{..} by \mbox{\cite{..}} in added or deleted blocks in post-processing
1691  push(@MBOXCMDLIST,$citpat) unless $disablecitmark;
1692  if ( uc($subtype) eq "COLOR" or uc($subtype) eq "DVIPSCOL" ) {
1693    # remove \cite command again from list of safe commands
1694    pop @MBOXCMDLIST;
1695    # deleted cite commands
1696  }
1697}
1698push(@MBOXCMDLIST,$citpat) if $enablecitmark ;
1699
1700
1701if (defined $packages{"amsmath"}  or defined $packages{"amsart"} or defined $packages{"amsbook"} ) {
1702  print STDERR "amsmath package detected.\n" if $verbose ;
1703  $MATHARRREPL='align*';
1704}
1705
1706# add commands in MBOXCMDLIST to SAFECMDLIST
1707foreach $mboxcmd ( @MBOXCMDLIST ) {
1708  init_regex_arr_list(\@SAFECMDLIST, $mboxcmd);
1709}
1710
1711# check if \label is in SAFECMDLIST, and if yes replace "label" in $LABELCMD by something that never matches (we hope!)
1712if ( iscmd("label",\@SAFECMDLIST,\@SAFECMDEXCL) ) {
1713  $LABELCMD=~ s/label/NEVERMATCHLABEL/;
1714}
1715
1716
1717
1718print STDERR "Preprocessing body.  " if $verbose;
1719preprocess($oldbody,$newbody);
1720
1721
1722# run difference algorithm
1723@diffbody=bodydiff($oldbody, $newbody);
1724$diffbo=join("",@diffbody);
1725writedebugfile($diffbo,"bodydiff");
1726
1727print STDERR "(",exetime()," s)\n","Postprocessing body. \n" if $verbose;
1728postprocess($diffbo);
1729$diffall =join("\n",@diffpreamble) ;
1730# add visible labels
1731if (defined($visiblelabel)) {
1732  # Give information right after \begin{document} (or at the beginning of the text for files without preamble
1733  ### if \date command is used, add information to \date argument, otherwise give right after \begin{document}
1734  ###  $diffall=~s/(\\date$extraspace(?:\[$brat0\])?$extraspace)\{($pat_n)\}/$1\{$2 \\ LATEXDIFF comparison \\ Old: $oldlabel \\ New: $newlabel \}/  or
1735  $diffbo = "\\begin{verbatim}LATEXDIFF comparison\nOld: $oldlabel\nNew: $newlabel\\end{verbatim}\n$diffbo"   ;
1736}
1737
1738$diffall .= "$diffbo" ;
1739if (defined $packages{"hyperref"} && $nolinks) {
1740  $diffall .= "\\end{NoHyper}\n";
1741}
1742$diffall .= "\\end{document}$newpost" if length $newpreamble ;
1743if ( lc($encoding) ne "utf8" && lc($encoding) ne "ascii" ) {
1744  print STDERR "Encoding output file to $encoding\n" if $verbose;
1745  $diffall=Encode::encode($encoding,$diffall);
1746  binmode STDOUT;
1747}
1748print $diffall;
1749
1750
1751print STDERR "(",exetime()," s)\n","Done.\n" if $verbose;
1752
1753
1754# liststringtoregex(liststring)
1755# expands string with semi-colon separated list into a regular expression corresponding
1756# matching any of the elements
1757sub liststringtoregex {
1758  my ($liststring)=@_;
1759  my @elements=grep /\S/,split(";",$liststring);
1760  if ( @elements) {
1761    return('(?:(?:' . join(')|(?:',@elements) .'))');
1762  } else {
1763    return "";
1764  }
1765}
1766
1767# show_configuration
1768# note that this is not encapsulated but uses variables from the main program
1769# It is provided for convenience because in the future it is planned to allow output
1770# to be modified based on what packages are read etc - this works only if the input files are actually read
1771# whether or not additional files are provided
1772sub show_configuration {
1773  if ($showpreamble) {
1774    print "\nPreamble commands:\n";
1775    print $latexdiffpreamble ;
1776  }
1777
1778  if ($showsafe) {
1779    print "\nsafecmd: Commands safe within scope of $ADDOPEN $ADDCLOSE and $DELOPEN $DELCLOSE (unless excluded):\n";
1780    print_regex_arr(@SAFECMDLIST);
1781    print "\nsafecmd-exlude: Commands not safe within scope of $ADDOPEN $ADDCLOSE and $DELOPEN $DELCLOSE :\n";
1782    print_regex_arr(@SAFECMDEXCL);
1783    print "\nmboxsafecmd:  Commands safe only if they are surrounded by \\mbox command:\n";
1784    print_regex_arr(@MBOXCMDLIST);
1785    print "\nnmboxsafecmd: Commands not safe:\n";
1786    print_regex_arr(@MBOXCMDEXCL);
1787  }
1788
1789  if ($showtext) {
1790    print "\nCommands with last argument textual (unless excluded) and safe in every context:\n";
1791    print_regex_arr(@TEXTCMDLIST);
1792    print "\nContext1 commands (last argument textual, command will be disabled in deleted passages, last argument will be shown as plain text):\n";
1793    print_regex_arr(@CONTEXT1CMDLIST);
1794    print "\nContext2 commands (last argument textual, command and its argument will be disabled in deleted passages):\n";
1795    print_regex_arr(@CONTEXT2CMDLIST);
1796    print "\nExclude list of Commands with last argument not textual (overrides patterns above):\n";
1797    print_regex_arr(@TEXTCMDEXCL);
1798  }
1799
1800
1801  if ($showconfig) {
1802    print "Configuration variables:\n";
1803    print "ARRENV=$ARRENV\n";
1804    print "COUNTERCMD=$COUNTERCMD\n";
1805    print "FLOATENV=$FLOATENV\n";
1806    print "ITEMCMD=$ITEMCMD\n";
1807    print "LISTENV=$LISTENV\n";
1808    print "MATHARRENV=$MATHARRENV\n";
1809    print "MATHARRREPL=$MATHARRREPL\n";
1810    print "MATHENV=$MATHENV\n";
1811    print "MATHREPL=$MATHREPL\n";
1812    print "MINWORDSBLOCK=$MINWORDSBLOCK\n";
1813    print "PICTUREENV=$PICTUREENV\n";
1814    print "SCALEDELGRAPHICS=$SCALEDELGRAPHICS\n";
1815    print "VERBATIMENV=$VERBATIMENV\n";
1816    print "VERBATIMLINEENV=$VERBATIMLINEENV\n";
1817    print "CUSTOMDIFCMD=$CUSTOMDIFCMD\n";
1818  }
1819}
1820
1821
1822
1823## guess_encoding(filename)
1824## reads the first 20 lines of filename and looks for call of inputenc package
1825## if found, return the option of this package (encoding), otherwise return utf8
1826sub guess_encoding {
1827  my ($filename)=@_;
1828  my ($i,$enc);
1829  open (FH, $filename) or die("Couldn't open $filename: $!");
1830  $i=0;
1831  while (<FH>) {
1832    next if /^\s*%/;    # skip comment lines
1833    if (m/\\usepackage\[(\w*?)\]\{inputenc\}/) {
1834      close(FH);
1835      return($1);
1836    }
1837    last if (++$i > 20 ); # scan at most 20 non-comment lines
1838  }
1839  close(FH);
1840  ### return("ascii");
1841  return("utf8");
1842}
1843
1844
1845sub read_file_with_encoding {
1846  my ($output);
1847  my ($filename, $encoding) = @_;
1848
1849  if (lc($encoding) eq "utf8" ) {
1850    open (FILE, "<:utf8",$filename) or die("Couldn't open $filename: $!");
1851    local $/ ; # locally set record operator to undefined, ie. enable whole-file mode
1852    $output=<FILE>;
1853  } elsif ( lc($encoding) eq "ascii") {
1854    open (FILE, $filename) or die("Couldn't open $filename: $!");
1855    local $/ ; # locally set record operator to undefined, ie. enable whole-file mode
1856    $output=<FILE>;
1857  } else {
1858    require Encode;
1859    open (FILE, "<",$filename) or die("Couldn't open $filename: $!");
1860    local $/ ; # locally set record operator to undefined, ie. enable whole-file mode
1861    $output=<FILE>;
1862    print STDERR "Converting $filename from $encoding to utf8\n" if $verbose;
1863    $output=Encode::decode($encoding,$output);
1864  }
1865  close FILE;
1866  if ($^O eq "linux" ) {
1867    $output =~ s/\r\n/\n/g ;
1868  }
1869  return $output;
1870}
1871
1872## %packages=list_packages(@preamble)
1873## scans the arguments for \documentclass,\RequirePackage and \usepackage statements and constructs a hash
1874## whose keys are the included packages, and whose values are the associated optional arguments
1875#sub list_packages {
1876#  my (@preamble)=@_;
1877#  my %packages=();
1878#  foreach $line ( @preamble ) {
1879#    # get rid of comments
1880#    $line=~s/(?<!\\)%.*$// ;
1881#    if ( $line =~ m/\\(?:documentclass|usepackage|RequirePackage)(?:\[(.+?)\])?\{(.*?)\}/ ) {
1882##      print STDERR "Found something: |$line|\n" if $debug;
1883#      if (defined($1)) {
1884#	$packages{$2}=$1;
1885#      } else {
1886#	$packages{$2}="";
1887#      }
1888#    }
1889#  }
1890#  return (%packages);
1891#}
1892
1893
1894# %packages=list_packages($preamble)
1895# scans the arguments for \documentclass,\RequirePackage and \usepackage statements and constructs a hash
1896# whose keys are the included packages, and whose values are the associated optional arguments
1897# if argument of \usepackage or \RequirePackage is comma separated list, treat as different packages
1898sub list_packages {
1899  my ($preamble)=@_;
1900  my %packages=();
1901  my $pkg;
1902
1903  # remove comments
1904  $preamble=~s/(?<!\\)%.*$//mg ;
1905
1906  while ( $preamble =~  m/\\(?:documentclass|usepackage|RequirePackage)(?:\[($brat_n)\])?\{(.*?)\}/gs ) {
1907    if (defined($1)) {
1908      foreach $pkg ( split /,/,$2 ) {
1909	$packages{$pkg}=$1;
1910      }
1911    } else {
1912      foreach $pkg ( split /,/,$2 ) {
1913	$packages{$pkg}="";
1914      }
1915    }
1916  }
1917
1918  # sometimes, class options are defined in such a way that they imply the loading and/or presence of a package
1919  # so we also treat all class options as 'packages.
1920  if ( $preamble =~  m/\\documentclass\s*\[($brat_n)\]\s*\{.*?\}/s ) {
1921     foreach $pkg ( split /,/,$1 ) {
1922       $pkg =~ s/\s//g ;   # remove space and newline characters
1923       $packages{$pkg}="" unless exists($packages{$pkg});
1924     }
1925  }
1926  return (%packages);
1927}
1928
1929# Subroutine add_safe_commands modified from version provided by S. Gouezel
1930# add_safe_commands($preamble)
1931# scans the argument for \newcommand and \DeclareMathOperator,
1932# and adds the created commands which are clearly safe to @SAFECMDLIST
1933sub add_safe_commands {
1934  my ($preamble)=@_;
1935
1936  # get rid of comments
1937  $preamble=~s/(?<!\\)%.*$//mg ;
1938
1939  my $to_test = "";
1940  # test for \DeclareMathOperator{\foo}{myoperator}
1941  while ( $preamble =~ m/\DeclareMathOperator\s*\*?\{\\(\w*?)\}/osg) {
1942    $to_test=$1;
1943    if ($to_test ne "" and not iscmd($to_test,\@SAFECMDLIST,\@SAFECMDEXCL) and not iscmd($to_test, \@SAFECMDEXCL, [])) {
1944      # one should add $to_test to the list of safe commands.
1945      init_regex_arr_list(\@SAFECMDLIST, $to_test);
1946      print STDERR "Adding $to_test to the list of safe commands\n" if $verbose;
1947    }
1948  }
1949
1950  while ( $preamble =~ m/\\(?:new|renew|provide)command\s*{\\(\w*)\}(?:|\[\d*\])\s*\{(${pat_n})\}/osg ) {
1951    my $maybe_to_test  = $1;
1952    my $should_be_safe = $2;
1953    print STDERR "DEBUG Checking new command: maybe_to_test, should_be_safe: $1 $2\n" if $debug;
1954    my $success = 0;
1955    # test if all latex commands inside it are safe
1956    $success = 1;
1957    if ($should_be_safe =~ m/\\\\/) {
1958      $success = 0;
1959    } else {
1960      while ($should_be_safe =~ m/\\(\w+)/g) {
1961	###	  print STDERR "DEBUG: Testing command $1 " if $debug;
1962	$success = 0 unless iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL); ### or $1 eq "";
1963	###        print STDERR " success=$success\n" if $debug;
1964      }
1965    }
1966    ###      }
1967    if ($success) {
1968      $to_test = $maybe_to_test;
1969      if (  not iscmd($to_test,\@SAFECMDLIST,\@SAFECMDEXCL) and not iscmd($to_test, \@SAFECMDEXCL, [])) {
1970	#        # one should add $to_test to the list of safe commands.
1971	init_regex_arr_list(\@SAFECMDLIST, $to_test);
1972	print STDERR "Adding $to_test to the list of safe commands\n" if $verbose;
1973      }
1974    }
1975  }
1976}
1977
1978
1979# helper function for flatten
1980# remove \endinput at beginning of line and everything
1981# following it, # if \endinput is not at the beginning of
1982# the line, nothing will be removed. It is assumed that
1983# this case is most common when \endinput is part of a
1984# conditional clause.  The file will only be processed
1985# correctly if the conditional is always false,
1986# i.e. \endinput # not actually reached
1987sub remove_endinput {
1988  # s/// operates on default input
1989  $_[0] =~ s/^\\endinput.*\Z//ms ;
1990  return($_[0]);
1991}
1992
1993# flatten($text,$preamble,$filename,$encoding)
1994# expands \input and \include commands within text
1995# expands \bibliography command with corresponding bbl file if available
1996# expands \subfile command (from subfiles package - not part of standard text distribution)
1997# preamble is scanned for includeonly commands
1998# encoding is the encoding
1999sub flatten {
2000  my ($text,$preamble,$filename,$encoding)=@_;
2001  my ($includeonly,$dirname,$fname,$newpage,$fullfile,$filecontent,$replacement,$begline,$inputcmd,$bblfile,$subfile,$command,$verbenv,$verboptions,$ignore,$fileonly);
2002  my ($subpreamble,$subbody,$subpost);
2003  my ($subdir,$subdirfull,$importfilepath);
2004  require File::Basename ;
2005  ###  require File::Spec ;    # now this is needed even if flatten option not given
2006  $filename = File::Spec->rel2abs( $filename ) ;
2007  ($ignore, $dirname, $fileonly) = File::Spec->splitpath($filename) ;
2008  $bblfile = $filename;
2009  $bblfile=~s/\.tex$//;
2010  $bblfile.=".bbl";
2011
2012  if ( ($includeonly) = ($preamble =~ m/\\includeonly\{(.*?)\}/ ) ) {
2013    $includeonly =~ s/,/|/g;
2014  } else {
2015    $includeonly = '.*?';
2016  }
2017
2018  print STDERR "DEBUG: includeonly $includeonly\n" if $debug;
2019
2020
2021  # Run through filter, to let filterscript have a pass if it was set
2022  $text = filter($text);
2023
2024  # Recursively replace \\import and \\subimport files
2025  $text =~ s/(^(?:[^%\n]|\\%)*)(\\subimport\{(.*?)\}|\\import\{(.*?)\})(?:[\s]*)\{(.*?)\}/{
2026          # $1 is begline
2027          # $3 is directory if subimport
2028          # $4 is directory if import
2029          # $5 is filename
2030          $begline = (defined($1)? $1 : "");
2031          $subdir = $3 if defined($3);
2032          $subdir = $4 if defined($4);
2033          $fname = $5;
2034          $fname .= ".tex" unless $fname =~ m|\.\w{3,4}$|;
2035          print STDERR "DEBUG begline:", $begline, "\n" if $debug;
2036          print STDERR "DEBUG", (defined($3)? "subimport_file:" : "import_file:"), $subdir, "\n" if $debug;
2037          print STDERR "DEBUG file:", $fname, "\n" if $debug;
2038
2039          # subimport appends $subdir to the current $dirname.  import replaces it with an absolute path.
2040          $subdirfull = (defined($3) ? File::Spec->catdir($dirname,$subdir) : $subdir);
2041
2042          $importfilepath = File::Spec->catfile($subdirfull, $fname);
2043
2044          print STDERR "importing importfilepath:", $importfilepath,"\n" if $verbose;
2045          if ( -f $importfilepath ) {
2046              # If file exists, replace input or include command with expanded input
2047              $replacement=flatten(read_file_with_encoding($importfilepath, $encoding), $preamble,$importfilepath,$encoding) or die "Could not open file ",$fullfile,": $!";
2048          } else {
2049              # if file does not exist, do not expand include or input command (do not warn if fname contains #[0-9] as it is then likely part of a command definition
2050              # and is not meant to be expanded directly
2051              print STDERR "WARNING: Could not find included file ",$importfilepath,". I will continue but not expand |$2|\n";
2052              $replacement=(defined($3)? "\\subimport" : "\\import");
2053              $replacement .= "{$subdir}{$fname} % Processed";
2054          }
2055          "$begline$replacement";
2056  }/exgm;
2057
2058  # recursively replace \\input and \\include files
2059  $text =~ s/(^(?:[^%\n]|\\%)*)(\\input\{(.*?)\}|\\include\{(${includeonly}(?:\.tex)?)\})/{
2060	    $begline=(defined($1)? $1 : "") ;
2061	    $inputcmd=$2;
2062	    $fname = $3 if defined($3) ;
2063	    $fname = $4 if defined($4) ;
2064            $newpage=(defined($4)? " \\newpage " : "") ;
2065            #      # add tex extension unless there is a three or four letter extension already
2066            $fname .= ".tex" unless $fname =~ m|\.\w{3,4}$|;
2067            $fullfile = File::Spec->catfile($dirname,$fname);
2068            print STDERR "DEBUG Beg of line match |$1|\n" if defined($1) && $debug ;
2069            print STDERR "Include file $fname\n" if $verbose;
2070            print STDERR "DEBUG looking for file ",$fullfile, "\n" if $debug;
2071            # content of file becomes replacement value (use recursion), add \newpage if the command was include
2072            if ( -f $fullfile ) {
2073	      # If file exists, replace input or include command with expanded input
2074	      $replacement=flatten(read_file_with_encoding($fullfile, $encoding), $preamble,$filename,$encoding) or die "Could not open file ",$fullfile,": $!";
2075	      $replacement = remove_endinput($replacement);
2076	      # \include always starts a new page; use explicit \newpage command to simulate this
2077	    } else {
2078	      # if file does not exist, do not expand include or input command (do not warn if fname contains #[0-9] as it is then likely part of a command definition
2079              # and is not meant to be expanded directly
2080	      print STDERR "WARNING: Could not find included file ",$fullfile,". I will continue but not expand |$inputcmd|\n" unless $fname =~ m(#[0-9]) ;
2081	      $replacement = $inputcmd ;   # i.e. just the original command again -> make no change file does not exist
2082	      $newpage="";
2083	    }
2084	    "$begline$newpage$replacement$newpage";
2085          }/exgm;
2086
2087  # replace bibliography with bbl file if it exists
2088  $text=~s/(^(?:[^%\n]|\\%)*)\\bibliography\{(.*?)\}/{
2089           if ( -f $bblfile ){
2090	     $replacement=read_file_with_encoding(File::Spec->catfile($bblfile), $encoding);
2091	   } else {
2092	     warn "Bibliography file $bblfile cannot be found. No flattening of \\bibliography done. Run bibtex on old and new files first";
2093	     $replacement="\\bibliography{$2}";
2094	   }
2095	   $begline=(defined($1)? $1 : "") ;
2096	   "$begline$replacement";
2097  }/exgm;
2098
2099  # replace subfile with contents (subfile package)
2100  $text=~s/(^(?:[^%\n]|\\%)*)\\subfile\{(.*?)\}/{
2101           $begline=(defined($1)? $1 : "") ;
2102     	   $fname = $2;
2103           #      # add tex extension unless there is a three or four letter extension already
2104           $fname .= ".tex" unless $fname =~ m|\.\w{3,4}|;
2105           print STDERR "Include file as subfile $fname\n" if $verbose;
2106           # content of file becomes replacement value (use recursion)
2107           # now strip away everything outside and including \begin{document} and \end{document} pair#
2108	   #             # note: no checking for comments is made
2109           $fullfile=File::Spec->catfile($dirname,$fname);
2110           if ( -f $fullfile) {
2111	     # if file exists, expand \subfile command by contents of file
2112	     $subfile=read_file_with_encoding($fullfile,$encoding) or die "Could not open included subfile ",$fullfile,": $!";
2113	     ($subpreamble,$subbody,$subpost)=splitdoc($subfile,'\\\\begin\{document\}','\\\\end\{document\}');
2114	     ###           $subfile=~s|^.*\\begin{document}||s;
2115	     ###           $subfile=~s|\\end{document}.*$||s;
2116	     $replacement=flatten($subbody, $preamble,$filename,$encoding);
2117	     ### $replacement = remove_endinput($replacement);
2118	   } else {
2119	      # if file does not exist, do not expand subfile
2120	      print STDERR "WARNING: Could not find subfile ",$fullfile,". I will continue but not expand |$2|\n" unless $fname =~ m(#[0-9]) ;
2121	      $replacement = "\\subfile\{$2\}" ;   # i.e. just the original command again -> make no change file does not exist
2122	    }
2123
2124	   "$begline$replacement";
2125  }/exgm;
2126
2127  # replace \verbatiminput and \lstlistinginput
2128  $text=~s/(^(?:[^%\n]|\\%)*)\\(verbatiminput\*?|lstinputlisting)$extraspace(\[$brat_n\])?$extraspace\{(.*?)\}/{
2129     $begline=(defined($1)? $1 : "") ;
2130     $command = $2 ;
2131     $fname = $4 ;
2132     $verboptions = defined($3)? $3 : "" ;
2133     if ($command eq 'verbatiminput' ) {
2134       $verbenv = "verbatim" ;
2135     } elsif ($command eq 'verbatiminput*' ) {
2136       $verbenv = "verbatim*" ;
2137     } elsif ($command eq 'lstinputlisting' ) {
2138       $verbenv = "lstlisting" ;
2139     } else {
2140       die "Internal errorL Unexpected verbatim input type $command.\n";
2141     }
2142     print STDERR "DEBUG Beg of line match |$begline|\n" if $debug ;
2143     print STDERR "Include file $fname  verbatim\n" if $verbose;
2144     print STDERR "DEBUG looking for file ",File::Spec->catfile($dirname,$fname), "\n" if $debug;
2145     # content of file becomes replacement value (do not use recursion), add \newpage if the command was include
2146     ###$replacement=read_file_with_encoding(File::Spec->catfile($dirname,$fname), $encoding) or die "Couldn't find file ",File::Spec->catfile($dirname,$fname),": $!";
2147     $replacement=read_file_with_encoding(File::Spec->catfile($dirname,$fname), $encoding) or die "Couldn't find file ",File::Spec->catfile($dirname,$fname),": $!";
2148     # Add a new line if it not already there (note that the matching operator needs to use different delimiters, as we are still inside an outer scope that takes precedence
2149     $replacement .= "\n" unless  $replacement =~ m(\n$)  ;
2150     "$begline\\begin{$verbenv}$verboptions\n$replacement\\end{$verbenv}\n";
2151    }/exgm;
2152
2153  return($text);
2154}
2155
2156
2157# print_regex_arr(@arr)
2158# prints regex array without x-ism expansion put in by pearl to stdout
2159sub print_regex_arr {
2160  my $dumstring;
2161  $dumstring = join(" ",@_);     # PERL generates string (?-xism:^ref$) for quoted refex ^ref$
2162  $dumstring =~ s/\(\?-xism:\^(.*?)\$\)/$1/g;   # remove string and ^,$ marks before output
2163  print $dumstring,"\n";
2164}
2165
2166
2167# @lines=extrapream($type,...)
2168# reads line from appendix or external file
2169# (end of file after __END__ token)
2170# if $type is a filename, it will read the file instead of reading from the appendix
2171# otherwise it will screen appendix for line "%DIF $TYPE" and copy everything up to line
2172# '%DIF END $TYPE' (where $TYPE is upcased version of $type)
2173# extrapream('-nofail',$type) will---instead of failing---simply return nothing if
2174# it does not find the matching line in a appendix (do not use -nofail option with multiple types!)
2175sub extrapream {
2176  my @types=@_;
2177  my ($type,$arg);
2178  my $nofail=0;
2179  ###my @retval=("%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF") ;
2180  my @retval=();
2181  my ($copy);
2182
2183  foreach $arg ( @types ) {
2184    if ( $arg eq '-nofail' ) {
2185      $nofail=1;
2186      next;
2187    }
2188    $type=$arg;
2189    $copy=0;
2190    if ( -f $type || lc $type eq '/dev/null' ) {
2191      open (FILE,$type) or die "Cannot open preamble file $type: $!";
2192      print STDERR "Reading preamble file $type\n" if $verbose ;
2193      while (<FILE>) {
2194	chomp ;
2195	if ( $_ =~ m/%DIF PREAMBLE/ ) {
2196	  push (@retval,"$_");
2197	} else {
2198	  push (@retval,"$_ %DIF PREAMBLE");
2199	}
2200      }
2201    } else {    # not (-f $type)
2202       $type=uc($type);   # upcase argument
2203       print STDERR "Preamble Internal Type $type\n" if $verbose;
2204       # save filehandle position (before first read this points to line after __END__)
2205       # but seek DATA,0,0 resets it to the beginning of the file
2206       # see https://stackoverflow.com/questions/4459601/how-can-i-use-data-twice
2207       my $data_start = tell DATA;
2208       while (<DATA>) {
2209	 if ( m/^%DIF $type/ ) {
2210	   $copy=1;
2211	 } elsif ( m/^%DIF END $type/ ) {
2212	   last;
2213	 }
2214	 chomp;
2215	 push (@retval,"$_ %DIF PREAMBLE") if $copy;
2216       }
2217       if ( $copy == 0 ) {
2218	 unless ($nofail) {
2219	   print STDERR "\nPreamble style $type not implemented.\n";
2220	   print STDERR "Write latexdiff -h to get help with available styles\n";
2221	   exit(2);
2222	 }
2223       }
2224       seek DATA,$data_start,0;    # rewind DATA handle to beginning of data record
2225     }
2226  }
2227  ###push (@retval,"%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF")  ;
2228  return @retval;
2229}
2230
2231
2232# ($part1,$part2,$part3)=splitdoc($text,$word1,$word2)
2233# splits $text into 3 parts at $word1 and $word2.
2234# if neither $word1 nor $word2 exist, $part1 and $part3 are empty, $part2 is $text
2235# If only $word1 or $word2 exist but not the other, output an error message.
2236
2237# NB this version avoids $` and $' for performance reason although it only makes a tiny difference
2238# (in one test gain a tenth of a second for a 30s run)
2239sub splitdoc {
2240  my ($text,$word1,$word2)=@_;
2241  my ($part1,$part2,$part3)=("","","");
2242  my ($rest,$pos);
2243
2244  if ( $text =~ m/(^[^%]*)($word1)/mg ) {
2245    $pos=pos $text;
2246    $part1=substr($text,0,$pos-length($2));
2247    $rest=substr($text,$pos);
2248    if ( $rest =~ m/(^[^%]*)($word2)/mg ) {
2249      $pos=pos $rest;
2250      $part2=substr($rest,0,$pos-length($2));
2251      $part3=substr($rest,$pos);
2252    }
2253    else {
2254      die "$word1 and $word2 not in the correct order or not present as a pair." ;
2255    }
2256  } else {
2257    $part2=$text;
2258    die "$word2 present but not $word1." if ( $text =~ m/(^[^%]*)$word2/ms );
2259  }
2260  return ($part1,$part2,$part3);
2261}
2262
2263
2264
2265
2266
2267# bodydiff($old,$new)
2268sub bodydiff {
2269  my ($oldwords, $newwords) = @_;
2270  my @retwords;
2271
2272  print STDERR "(",exetime()," s)\n","Splitting into latex tokens \n" if $verbose;
2273  print STDERR "Parsing $oldfile \n" if $verbose;
2274  my @oldwords = splitlatex($oldwords);
2275  print STDERR "Parsing $newfile \n" if $verbose;
2276  my @newwords = splitlatex($newwords);
2277
2278  if ( $debug ) {
2279    open(TOKENOLD,">","latexdiff.debug.tokenold");
2280    print TOKENOLD join("***\n",@oldwords);
2281    close(TOKENOLD);
2282    open(TOKENNEW,">","latexdiff.debug.tokennew");
2283    print TOKENNEW join("***\n",@newwords);
2284    close(TOKENNEW);
2285  }
2286
2287  print STDERR "(",exetime()," s)\n","Pass 1: Expanding text commands and merging isolated identities with changed blocks  " if $verbose;
2288  pass1(\@oldwords, \@newwords);
2289
2290
2291  print STDERR "(",exetime()," s)\n","Pass 2: inserting DIF tokens and mark up.  " if $verbose;
2292  if ( $debug ) {
2293    open(TOKENOLD,">","latexdiff.debug.tokenold2");
2294    print TOKENOLD join("***\n",@oldwords);
2295    close(TOKENOLD);
2296    open(TOKENNEW,">","latexdiff.debug.tokennew2");
2297    print TOKENNEW join("***\n",@newwords);
2298    close(TOKENNEW);
2299  }
2300
2301  @retwords=pass2(\@oldwords, \@newwords);
2302
2303  return(@retwords);
2304}
2305
2306
2307
2308
2309# @words=splitlatex($string)
2310# split string according to latex rules
2311# Each element of words is either
2312# a word (including trailing spaces and punctuation)
2313# a latex command
2314# if there is white space in the beginning return that as first token
2315sub splitlatex {
2316  my ($inputstring) = @_ ;
2317  my $string=$inputstring ;
2318  # if input is empty, return empty list
2319  length($string)>0 or return ();
2320  $string=~s/^(\s*)//s;
2321  my $leadin=$1;
2322  length($string)>0 or return ($leadin);
2323
2324  my @retval=($string =~ m/$pat/osg);
2325
2326  if (length($string) != length(join("",@retval))) {
2327    print STDERR "\nWARNING: Inconsistency in length of input string and parsed string:\n     This often indicates faulty or non-standard latex code.\n     In many cases you can ignore this and the following warning messages.\n Note that character numbers in the following are counted beginning after \\begin{document} and are only approximate." unless $ignorewarnings;
2328    print STDERR "DEBUG Original length ",length($string),"  Parsed length ",length(join("",@retval)),"\n" if $debug;
2329    print STDERR "DEBUG Input string:  |$string|\n" if (length($string)<500) && $debug;
2330    print STDERR "DEBUG Token parsing: |",join("+",@retval),"|\n" if (length($string)<500) && $debug ;
2331    @retval=();
2332    # slow way only do this if other m//sg method fails
2333    my $last = 0;
2334    while ( $string =~ m/$pat/osg ) {
2335      my $match=$&;
2336      if ($last + length $& != pos $string  ) {
2337	my $pos=pos($string);
2338	my $offset=30<$last ? 30 : $last;
2339	my $dum=substr($string,$last-$offset,$pos-$last+2*$offset);
2340	my $dum1=$dum;
2341	my $cnt=$#retval;
2342	my $i;
2343	$dum1 =~ s/\n/ /g;
2344	unless ($ignorewarnings) {
2345	  print STDERR "\n$dum1\n";
2346	  print STDERR " " x 30,"^" x ($pos-$last)," " x 30,"\n";
2347	  print STDERR "Missing characters near word " . (scalar @retval) . " character index: " . $last . "-" .  pos($string) . " Length: " . length($match) . " Match: |$match| (expected match marked above).\n";
2348	}
2349	  # put in missing characters `by hand'
2350	push (@retval, substr($dum,$offset,$pos-$last-length($match)));
2351#       Note: there seems to be a bug in substr with utf8 that made the following line output substr which were too long,
2352#             using dum instead appears to work
2353#	push (@retval, substr($string,$last, pos($string)-$last-length($match)));
2354      }
2355      push (@retval, $match);
2356      $last=pos $string;
2357    }
2358
2359  }
2360
2361  unshift(@retval,$leadin) if (length($leadin)>0);
2362  return @retval;
2363}
2364
2365
2366# pass1( \@seq1,\@seq2)
2367# Look for differences between seq1 and seq2.
2368# Where an common-subsequence block is flanked by deleted or appended blocks,
2369# and is shorter than $MINWORDSBLOCK words it is appended
2370# to the last deleted or appended word.  If the block contains tokens other than words
2371# or punctuation it is not merged.
2372# Deleted or appended block consisting of words and safe commands only are
2373# also merged, to prevent break-up in pass2 (after previous isolated words have been removed)
2374# If there are commands with textual arguments (e.g. \caption) both in corresponding
2375# appended and deleted blocks split them such that the command and opening bracket
2376# are one token, then the rest is split up following standard rules, and the closing
2377# bracket is a separate token, ie. turn
2378# "\caption{This is a textual argument}" into
2379# ("\caption{","This ","is ","a ","textual ","argument","}")
2380# No return value.  Destructively changes sequences
2381sub pass1 {
2382  my $seq1 = shift ;
2383  my $seq2 = shift ;
2384
2385  my $len1 = scalar @$seq1;
2386  my $len2 = scalar @$seq2;
2387  my $wpat=qr/^(?:[a-zA-Z.,'`:;?()!]*)[\s~]*$/;   #'
2388
2389  my ($last1,$last2)=(-1,-1) ;
2390  my $cnt=0;
2391  my $block=[];
2392  my $addblock=[];
2393  my $delblock=[];
2394  my $todo=[];
2395  my $instruction=[];
2396  my $i;
2397  my (@delmid,@addmid,@dummy);
2398
2399  my ($addcmds,$delcmds,$matchindex);
2400  my ($addtextblocks,$deltextblocks);
2401  my ($addtokcnt,$deltokcnt,$mattokcnt)=(0,0,0);
2402  my ($addblkcnt,$delblkcnt,$matblkcnt)=(0,0,0);
2403
2404  my $adddiscard = sub {
2405                      if ($cnt > 0 ) {
2406			$matblkcnt++;
2407			# just after an unchanged block
2408#			print STDERR "Unchanged block $cnt, $last1,$last2 \n";
2409                        if ($cnt < $MINWORDSBLOCK
2410			    && $cnt==scalar (
2411				     grep { /^$wpat/ || ( /^\\((?:[`'^"~=.]|[\w\d@*]+))((?:\[$brat_n\]|\{$pat_n\})*)/o
2412							   && iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL)
2413							   && scalar(@dummy=split(" ",$2))<3 ) }
2414					     @$block) )  {
2415			  # merge identical blocks shorter than $MINWORDSBLOCK
2416			  # and only containing ordinary words
2417			  # with preceding different word
2418			  # We cannot carry out this merging immediately as this
2419			  # would change the index numbers of seq1 and seq2 and confuse
2420			  # the algorithm, instead we store in @$todo where we have to merge
2421                          push(@$todo, [ $last1,$last2,$cnt,@$block ]);
2422			}
2423			$block = [];
2424			$cnt=0; $last1=-1; $last2=-1;
2425		      }
2426		    };
2427  my $discard=sub { $deltokcnt++;
2428                    &$adddiscard; #($_[0],$_[1]);
2429		    push(@$delblock,[ $seq1->[$_[0]],$_[0] ]);
2430		    $last1=$_[0] };
2431
2432  my $add =   sub { $addtokcnt++;
2433                    &$adddiscard; #($_[0],$_[1]);
2434		    push(@$addblock,[ $seq2->[$_[1]],$_[1] ]);
2435		    $last2=$_[1] };
2436
2437  my $match = sub { $mattokcnt++;
2438                    if ($cnt==0) {   # first word of matching sequence after changed sequence or at beginning of word sequence
2439		      $deltextblocks = extracttextblocks($delblock);
2440		      $delblkcnt++ if scalar @$delblock;
2441		      $addtextblocks = extracttextblocks($addblock);
2442		      $addblkcnt++ if scalar @$addblock;
2443
2444		      # make a list of all TEXTCMDLIST commands in deleted and added blocks
2445		      $delcmds = extractcommands($delblock);
2446      		      $addcmds = extractcommands($addblock);
2447		      # now find those text commands, which are found in both deleted and added blocks, and expand them
2448		      # keygen(third argument of _longestCommonSubsequence) implies to sort on command (0th elements of $addcmd elements)
2449		      # the calling format for longestCommonSubsequence has changed between versions of
2450		      # Algorithm::Diff so we need to check which one we are using
2451		      if ( $algodiffversion  > 1.15 ) {
2452			### Algorithm::Diff 1.19
2453			$matchindex=Algorithm::Diff::_longestCommonSubsequence($delcmds,$addcmds, 0, sub { $_[0]->[0] } );
2454		      } else {
2455			### Algorithm::Diff 1.15
2456			$matchindex=Algorithm::Diff::_longestCommonSubsequence($delcmds,$addcmds, sub { $_[0]->[0] } );
2457		      }
2458
2459		      for ($i=0 ; $i<=$#$matchindex ; $i++) {
2460			if (defined($matchindex->[$i])){
2461			  $j=$matchindex->[$i];
2462			  @delmid=splitlatex($delcmds->[$i][3]);
2463			  @addmid=splitlatex($addcmds->[$j][3]);
2464			  while (scalar(@$deltextblocks)  && $deltextblocks->[0][0]<$delcmds->[$i][1]) {
2465			    my ($index,$block,$cnt)=@{ shift(@$deltextblocks) };
2466			    push(@$todo, [$index,-1,$cnt,@$block]);
2467			  }
2468			  push(@$todo, [ $delcmds->[$i][1],-1,-1,$delcmds->[$i][2],@delmid,$delcmds->[$i][4]]);
2469
2470			  while (scalar(@$addtextblocks) && $addtextblocks->[0][0]<$addcmds->[$j][1]) {
2471			    my ($index,$block,$cnt)=@{ shift(@$addtextblocks) };
2472			    push(@$todo, [-1,$index,$cnt,@$block]);
2473			  }
2474			  push(@$todo, [ -1,$addcmds->[$j][1],-1,$addcmds->[$j][2],@addmid,$addcmds->[$j][4]]);
2475			}
2476		      }
2477		      # mop up remaining textblocks
2478		      while (scalar(@$deltextblocks)) {
2479			my ($index,$block,$cnt)=@{ shift(@$deltextblocks) } ;
2480			push(@$todo, [$index,-1,$cnt,@$block]);
2481		      }
2482		      while (scalar(@$addtextblocks)) {
2483			my ($index,$block,$cnt)=@{ shift(@$addtextblocks) };
2484			push(@$todo, [-1,$index,$cnt,@$block]);
2485		      }
2486
2487		      $addblock=[];
2488		      $delblock=[];
2489		    }
2490		    push(@$block,$seq2->[$_[1]]);
2491		    $cnt++  };
2492
2493  my $keyfunc = sub { join("  ",split(" ",shift())) };
2494
2495  traverse_sequences($seq1,$seq2, { MATCH=>$match, DISCARD_A=>$discard, DISCARD_B=>$add }, $keyfunc );
2496
2497
2498  # now carry out the merging/splitting.  Refer to elements relative from
2499  # the end (with negative indices) as these offsets don't change before the instruction is executed
2500  # cnt>0: merged small unchanged groups with previous changed blocks
2501  # cnt==-1: split textual commands into components
2502  foreach $instruction ( @$todo) {
2503    ($last1,$last2,$cnt,@$block)=@$instruction ;
2504    if ($cnt>=0) {
2505      splice(@$seq1,$last1-$len1,1+$cnt,join("",$seq1->[$last1-$len1],@$block)) if $last1>=0;
2506      splice(@$seq2,$last2-$len2,1+$cnt,join("",$seq2->[$last2-$len2],@$block)) if $last2>=0;
2507    } else {
2508      splice(@$seq1,$last1-$len1,1,@$block) if $last1>=0;
2509      splice(@$seq2,$last2-$len2,1,@$block) if $last2>=0;
2510    }
2511  }
2512
2513  if ($verbose) {
2514    print STDERR "\n";
2515    print STDERR "  $mattokcnt matching  tokens in $matblkcnt blocks.\n";
2516    print STDERR "  $deltokcnt discarded tokens in $delblkcnt blocks.\n";
2517    print STDERR "  $addtokcnt appended  tokens in $addblkcnt blocks.\n";
2518  }
2519}
2520
2521
2522# extracttextblocks(\@blockindex)
2523# $blockindex has the following format
2524# [ [ token1, index1 ], [token2, index2],.. ]
2525# where index refers to the index in the original old or new word sequence
2526# Returns: reference to an array of the form
2527# [[ $index, $textblock, $cnt ], ..
2528# where $index index of block to be merged
2529#       $textblock contains all the words to be merged with the word at $index (but does not contain this word)
2530#       $cnt   is length of block
2531#
2532# requires: iscmd
2533#
2534sub extracttextblocks {
2535  my $block=shift;
2536  my ($i,$token,$index);
2537  my $textblock=[];
2538  my $last=-1;
2539  my $wpat=qr/^(?:[a-zA-Z.,'`:;?()!]*)[\s~]*$/;  #'
2540  my $retval=[];
2541
2542  # we redefine locally $extraspace (shadowing the global definition) to capture command sequences with intervening spaces no matter what the global setting
2543  # this is done so we can capture those commands with a predefined number of arguments without having to introduce them again explicitly here
2544  my $extraspace='\s*';
2545
2546  for ($i=0;$i< scalar @$block;$i++) {
2547    ($token,$index)=@{ $block->[$i] };
2548    # store pure text blocks
2549    if ($token =~ /$wpat/ ||  ( $token =~/^\\((?:[`'^"~=.]|[\w\d@\*]+))((?:${extraspace}\[$brat_n\]${extraspace}|${extraspace}\{$pat_n\})*)/
2550				&& iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL)
2551				&& !iscmd($1,\@TEXTCMDLIST,\@TEXTCMDEXCL))) {
2552      # we have text or a command which can be treated as text
2553      if ($last<0) {
2554	# new pure-text block
2555	$last=$index;
2556      } else {
2557	# add to pure-text block
2558	push(@$textblock, $token);
2559      }
2560    } else {
2561      # it is not text
2562      if (scalar(@$textblock)) {
2563	push(@$retval,[ $last, $textblock, scalar(@$textblock) ]);
2564      }
2565      $textblock=[];
2566      $last=-1;
2567    }
2568  }
2569  # finish processing a possibly unfinished block before returning
2570  if (scalar(@$textblock)) {
2571    push(@$retval,[ $last, $textblock, scalar(@$textblock) ]);
2572  }
2573  return($retval)
2574}
2575
2576
2577
2578# extractcommands( \@blockindex )
2579# $blockindex has the following format
2580# [ [ token1, index1 ], [token2, index2],.. ]
2581# where index refers to the index in the original old or new word sequence
2582# Returns: reference to an array of the form
2583# [ [ "\cmd1", index, "\cmd1[optarg]{arg1}{", "arg2" ,"} " ],..
2584# where index is just taken from input array
2585# command must have a textual argument as last argument
2586#
2587# requires: iscmd
2588#
2589sub extractcommands {
2590  my $block=shift;
2591  my ($i,$token,$index,$cmd,$open,$mid,$closing);
2592  my $retval=[];
2593
2594  # we redefine locally $extraspace (shadowing the global definition) to capture command sequences with intervening spaces no matter what the global setting
2595  # this is done so we can capture those commands with a predefined number of arguments without having to introduce them again explicitly here
2596  my $extraspace='\s*';
2597
2598  for ($i=0;$i< scalar @$block;$i++) {
2599    ($token,$index)=@{ $block->[$i] };
2600    # check if token is an alphanumeric command sequence with at least one non-optional argument
2601    # \cmd[...]{...}{last argument}
2602    # Capturing in the following results in these associations
2603    # $1: \cmd[...]{...}{
2604    # $2: \cmd
2605    # $3: last argument
2606    # $4: }  + trailing spaces
2607    if ( ( $token =~ m/^(\\([\w\d\*]+)(?:${extraspace}\[$brat_n\]|${extraspace}\{$pat_n\})*${extraspace}\{)($pat_n)(\}\s*)$/so )
2608	 && iscmd($2,\@TEXTCMDLIST,\@TEXTCMDEXCL) ) {
2609      print STDERR "DEBUG EXTRACTCOMMANDS Match |$1|$2|$3|$4|$index \n" if $debug;
2610      #      push(@$retval,[ $2,$index,$1,$3,$4 ]);
2611      ($cmd,$open,$mid,$closing) = ($2,$1,$3,$4) ;
2612      $closing =~ s/\}/\\RIGHTBRACE/ ;
2613      push(@$retval,[ $cmd,$index,$open,$mid,$closing ]);
2614    }
2615  }
2616  return $retval;
2617}
2618
2619# iscmd($cmd,\@regexarray,\@regexexcl) checks
2620# return 1 if $cmd matches any of the patterns in the
2621# array $@regexarray, and none of the patterns in \@regexexcl, otherwise return 0
2622sub iscmd {
2623  my ($cmd,$regexar,$regexexcl)=@_;
2624  my ($ret)=0;
2625  ### print STDERR "DEBUG: iscmd($cmd)=" if $debug;
2626  foreach $pat ( @$regexar ) {
2627    if ( $cmd =~ m/^${pat}$/ ) {
2628      $ret=1 ;
2629      last;
2630    }
2631  }
2632  ### print STDERR "0\n" if ($debug && !$ret) ;
2633  return 0 unless $ret;
2634  foreach $pat ( @$regexexcl ) {
2635    ### print STDERR "0\n" if ( $debug && $cmd =~ m/^${pat}$/) ;
2636    return 0 if ( $cmd =~ m/^${pat}$/ );
2637  }
2638  ### print STDERR "1\n" if $debug;
2639  return 1;
2640}
2641
2642
2643# pass2( \@seq1,\@seq2)
2644# Look for differences between seq1 and seq2.
2645# Mark begin and end of deleted and appended sequences with tags $DELOPEN and $DELCLOSE
2646# and $ADDOPEN and $ADDCLOSE, respectively, however exclude { } & and all comands, unless
2647# they match an element of the whitelist (SAFECMD)
2648# For words in TEXTCMD but not in SAFECMD, enclose interior with $ADDOPEN and $ADDCLOSE brackets
2649# Deleted comment lines are marked with %DIF <
2650# Added comment lines are marked with %DIF >
2651sub pass2 {
2652  my $seq1 = shift ;
2653  my $seq2 = shift ;
2654
2655  my ($addtokcnt,$deltokcnt,$mattokcnt)=(0,0,0);
2656  my ($addblkcnt,$delblkcnt,$matblkcnt)=(0,0,0);
2657
2658  my $retval = [];
2659  my $delhunk   = [];
2660  my $addhunk   = [];
2661
2662  my $discard = sub { $deltokcnt++;
2663                      push ( @$delhunk, $seq1->[$_[0]]) };
2664
2665  my $add = sub { $addtokcnt++;
2666                  push ( @$addhunk, $seq2->[$_[1]]) };
2667
2668  my $match = sub { $mattokcnt++;
2669		    if ( scalar @$delhunk ) {
2670                      $delblkcnt++;
2671		      # mark up changes, but comment out commands
2672                      push @$retval,marktags($DELMARKOPEN,$DELMARKCLOSE,$DELOPEN,$DELCLOSE,$DELCMDOPEN,$DELCMDCLOSE,$DELCOMMENT,$delhunk);
2673		      $delhunk = [];
2674		    }
2675                    if ( scalar @$addhunk ) {
2676                      $addblkcnt++;
2677                      # we mark up changes, but simply quote commands
2678                      push @$retval,marktags($ADDMARKOPEN,$ADDMARKCLOSE,$ADDOPEN,$ADDCLOSE,"","",$ADDCOMMENT,$addhunk);
2679		      $addhunk = [];
2680		    }
2681		    push(@$retval,$seq2->[$_[1]]) };
2682
2683  my $keyfunc = sub { join("  ",split(" ",shift())) };
2684
2685  traverse_sequences($seq1,$seq2, { MATCH=>$match, DISCARD_A=>$discard, DISCARD_B=>$add }, $keyfunc );
2686  # clear up unprocessed hunks
2687  push @$retval,marktags($DELMARKOPEN,$DELMARKCLOSE,$DELOPEN,$DELCLOSE,$DELCMDOPEN,$DELCMDCLOSE,$DELCOMMENT,$delhunk) if scalar @$delhunk;
2688  push @$retval,marktags($ADDMARKOPEN,$ADDMARKCLOSE,$ADDOPEN,$ADDCLOSE,"","",$ADDCOMMENT,$addhunk) if scalar @$addhunk;
2689
2690
2691  if ($verbose) {
2692    print STDERR "\n";
2693    print STDERR "  $mattokcnt matching  tokens. \n";
2694    print STDERR "  $deltokcnt discarded tokens in $delblkcnt blocks.\n";
2695    print STDERR "  $addtokcnt appended  tokens in $addblkcnt blocks.\n";
2696  }
2697  return(@$retval);
2698}
2699
2700# marktags($openmark,$closemark,$open,$close,$opencmd,$closecmd,$comment,\@block)
2701# returns ($openmark,$open,$block,$close,$closemark) if @block contains no commands (except white-listed ones),
2702# braces, ampersands, or comments
2703# mark comments with $comment
2704# exclude all other exceptions from scope of open, close like this
2705# ($openmark, $open,...,$close, $opencmd,command, command,$closecmd, $open, ..., $close, $closemark)
2706# If $opencmd begins with "%" marktags assumes it is operating on a deleted block, otherwise on an added block
2707sub marktags {
2708  my ($openmark,$closemark,$open,$close,$opencmd,$closecmd,$comment,$block)=@_;
2709  my $word;
2710  my (@argtext);
2711  my $retval=[];
2712  my $noncomment=0;
2713  my $cmd=-1;    # -1 at beginning 0: last token written is a ordinary word
2714                 # 1: last token written is a command
2715                # for keeping track whether we are just in a command sequence or in a word sequence
2716  my $cmdcomment= ($opencmd =~ m/^%/);  # Flag to indicate whether opencmd is a comment (i.e. if we intend to simply comment out changed commands)
2717  my ($command,$commandword,$closingbracket) ; # temporary variables needed below to remember sub-pattern matches
2718
2719
2720
2721# split this block to split sequences joined in pass1
2722  @$block=splitlatex(join "",@$block);
2723  ### print STDERR "DEBUG: marktags $openmark,$closemark,$open,$close,$opencmd,$closecmd,$comment\n" if $debug;
2724  ### print STDERR "DEBUG: marktags blocksplit ",join("|",@$block),"\n" if $debug;
2725
2726  # we redefine locally $extraspace (shadowing the global definition) to capture command sequences with intervening spaces no matter what the global setting
2727  # this is done so we can capture those commands with a predefined number of arguments without having to introduce them again explicitly here
2728  my $extraspace_mt='\s*';
2729
2730
2731  foreach (@$block) {
2732    $word=$_;
2733    if ( $word =~ s/^%/%$comment/ ) {
2734      # a comment
2735      if ($cmd==1) {
2736	push (@$retval,$closecmd) ;
2737	$cmd=-1;
2738      }
2739      push (@$retval,$word);
2740      next;
2741    }
2742    if ( $word =~ m/^\s*$/ ) {
2743      ### print STDERR "DEBUG MARKTAGS: whitespace detected |$word| cmdcom |$cmdcomment| |$opencmd|\n" if $debug;
2744      # a sequence of white-space characters - this should only ever happen for the first element of block.
2745      # in deleted block, omit, otherwise just copy it in
2746      if ( ! $cmdcomment) {   # ignore in deleted blocks
2747	push(@$retval,$word);
2748      }
2749      next;
2750    }
2751    if (! $noncomment) {
2752      push (@$retval,$openmark);
2753      $noncomment=1;
2754    }
2755    # negative lookahead pattern (?!) in second clause is put in to avoid matching \( .. \) patterns
2756    # also note that second pattern will match \\
2757    if (  $word =~ /^[&{}\[\]]/ || ( $word =~ /^\\(?!\()(\\|[`'^"~=.]|[\w*@]+)/ &&  !iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL)) ) {
2758      ###    if ( $word =~ /^[&{}\[\]]/ || ( $word =~ /^\\([\w*@\\% ]+)/ && !iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL)) ) {
2759      # word is a command or other significant token (not in SAFECMDLIST)
2760	## same conditions as in subroutine extractcommand:
2761	# check if token is an alphanumeric command sequence with at least one non-optional argument
2762	# \cmd[...]{...}{last argument}
2763	# Capturing in the following results in these associations
2764	# $1: \cmd[...]{...}{
2765	# $2: cmd
2766	# $3: last argument
2767	# $4: }  + trailing spaces
2768	### pre-0.3    if ( ( $token =~ m/^(\\([\w\d\*]+)(?:\[$brat0\]|\{$pat_n\})*\{)($pat_n)(\}\s*)$/so )
2769      if ( ( $word =~ m/^(\\([\w\d\*]+)(?:${extraspace_mt}\[$brat_n\]|${extraspace_mt}\{$pat_n\})*${extraspace_mt}\{)($pat_n)(\}\s*)$/so )
2770	   && (iscmd($2,\@TEXTCMDLIST,\@TEXTCMDEXCL)|| iscmd($2,\@MATHTEXTCMDLIST,\@MATHTEXTCMDEXCL))
2771           && ( !$cmdcomment || !iscmd($2,\@CONTEXT2CMDLIST, \@CONTEXT2CMDEXCL) )  ) {
2772	# Condition 1: word is a command? - if yes, $1,$2,.. will be set as above
2773        # Condition 2: word is a text command - we mark up the interior of the word. There is a separate check for MATHTEXTCMDLIST
2774        #              because for $mathmarkup=WHOLE, the commands should not be split in pass1 (ie. math mode commands are not in
2775        #              TEXTCMDLIST, but the interior of MATHTEXT commnds should be highlighted in both deleted and added blocks
2776        # Condition 3: But if we are in a deleted block ($cmdcomment=1) and
2777        #            $2 (the command) is in context2, just treat it as an ordinary command (i.e. comment it open with $opencmd)
2778        # Because we do not want to disable this command
2779	# here we do not use $opencmd and $closecmd($opencmd is empty)
2780	if ($cmd==1) {
2781	  push (@$retval,$closecmd) ;
2782	} elsif ($cmd==0) {
2783	  push (@$retval,$close) ;
2784	}
2785        $command=$1; $commandword=$2; $closingbracket=$4;
2786	@argtext=splitlatex($3);   # split textual argument into tokens
2787	# and mark it up (but we do not need openmark and closemark)
2788        # insert command with initial arguments, marked-up final argument, and closing bracket
2789	if ( $cmdcomment && iscmd($commandword,\@CONTEXT1CMDLIST, \@CONTEXT1CMDEXCL) ) {
2790	  # context1cmd in a deleted environment; delete command itself but keep last argument, marked up
2791	  push (@$retval,$opencmd);
2792	  $command =~ s/\n/\n${opencmd}/sg ; # repeat opencmd at the beginning of each line
2793	  # argument, note that the additional comment character is included
2794          # to suppress linebreak after opening parentheses, which is important
2795          # for latexrevise
2796          push (@$retval,$command,"%\n{$AUXCMD\n",marktags("","",$open,$close,$opencmd,$closecmd,$comment,\@argtext),$closingbracket);
2797        } elsif ( iscmd($commandword,,\@MATHTEXTCMDLIST, \@MATHTEXTCMDEXCL) ) {
2798	  # MATHBLOCK pseudo command: consider all commands safe, except & and \\
2799	  # Keep these commands even in deleted blocks, hence set $opencmd and $closecmd (5th and 6th argument of marktags) to
2800	  # ""
2801	  local @SAFECMDLIST=(".*");
2802	  local @SAFECMDEXCL=('\\','\\\\',@UNSAFEMATHCMD);
2803	  push(@$retval,$command,marktags("","",$open,$close,"","",$comment,\@argtext)#@argtext
2804                       ,$closingbracket);
2805        } else {
2806	  # normal textcmd or context1cmd in an added block
2807	  push (@$retval,$command,marktags("","",$open,$close,$opencmd,$closecmd,$comment,\@argtext),$closingbracket);
2808	}
2809	push (@$retval,$AUXCMD,"\n") if $cmdcomment ;
2810	$cmd=-1 ;
2811      } elsif ( $cmdcomment &&
2812	       ( $word =~ m/^(\\([\w\d\*]+)(?:${extraspace_mt}\[$brat_n\]|${extraspace_mt}\{$pat_n\})*${extraspace_mt}\{)($pat_n)(\}\s*)/so )
2813	       && iscmd($2,\@KEEPCMDLIST, \@KEEPCMDEXCL) ) {
2814	# 'keepcmd' in a deleted environment: keep  the command as is
2815	push (@$retval,$close) if $cmd==0 ;
2816	push (@$retval,$word);
2817	$cmd=-1;  # pretend we are at the beginning of a sequence because we do not want to add an additional $closecmd or $close before the next token, no matter what it is
2818      } else {
2819	# ordinary command
2820	push (@$retval,$opencmd) if $cmd==-1 ;
2821	push (@$retval,$close,$opencmd) if $cmd==0 ;
2822	$word =~ s/\n/\n${opencmd}/sg if $cmdcomment ;   # if opencmd is a comment, repeat this at the beginning of every line
2823        ### print STDERR "MARKTAGS: Add command |$word|\n";
2824	push (@$retval,$word);
2825	$cmd=1;
2826      }
2827    } else {
2828      ###print STDERR "DEBUG MARKTAGS is an ordinary word or SAFECMD command \n" if $debug;
2829      # just an ordinary word or command in SAFECMD
2830      push (@$retval,$open) if $cmd==-1 ;
2831      push (@$retval,$closecmd,$open) if $cmd==1 ;
2832      ###TODO:  check here if it is a command in MBOXCMD list, and surround it with \mbox{...}
2833      ### $word =~ /^\\(?!\()(\\|[`'^"~=.]|[\w*@]+)/ &&  iscmd($1,\@MBOXCMDLIST,\@MBOXCMDEXCL))
2834      ### but actually this check has been carried out already so can simply check if word begins with backslash
2835      if ( $word =~ /^\\(?!\()(\\|[`'^"~=.]|[\w*@]+)(.*?)(\s*)$/s &&  iscmd($1,\@MBOXCMDLIST,\@MBOXCMDEXCL)) {
2836	# $word is a safe command in MBOXCMDLIST
2837	###print STDERR "DEBUG Mboxsafecmd detected:$word:\n" if $debug ;
2838	push(@$retval,"\\mbox{$AUXCMD\n\\" . $1 . $2 . $3 ."}\\hspace{0pt}$AUXCMD\n" );
2839      } else {
2840	# $word is a normal word or a safe command (not in MBOXCMDLIST)
2841	push (@$retval,$word);
2842      }
2843      $cmd=0;
2844    }
2845  }
2846  push (@$retval,$close) if $cmd==0;
2847  push (@$retval,$closecmd) if $cmd==1;
2848
2849  push (@$retval,$closemark) if ($noncomment);
2850  return @$retval;
2851}
2852
2853#used in preprocess
2854sub take_comments_and_newline_from_frac() {
2855  # some special magic for common usage of frac, which does not conform to the latexdiff requirements but can be made to fit
2856  # note that this is a rare exception to the general rule that the new tex can be reconstructed from the diff file
2857
2858  # \frac12 -> \frac{1}{2}
2859  s/\\frac(\d)(\w)/\\frac\{$1\}\{$2\}/g;
2860
2861  # \frac1{2b} -> \frac{1}{2b}
2862  s/\\frac(\d)/\\frac\{$1\}/g;
2863
2864  # delete space and comment characters between \frac arguments
2865#  s/\\frac(?:\s*?%[^\n]*?)*?(\{$pat_n\})\s*(\{$pat_n\})/\\frac$1$2/g;
2866  s/\\frac(?:\s|%[^\n]*?)*(\{$pat_n\})(?:\s|%[^\n]*?)*(\{$pat_n\})/\\frac$1$2/g;
2867}
2868
2869# preprocess($string, ..)
2870# carry out the following pre-processing steps for all arguments:
2871# 1. Remove leading white-space
2872#    Change \{ to \QLEFTBRACE and \} to \QRIGHTBRACE and \& to \AMPERSAND
2873# #.   Change {,} in comments to \CLEFTBRACE, \CRIGHTBRACE
2874# 2. mark all first empty line (in block of several) with \PAR tokens
2875# 3. Convert all '\%' into '\PERCENTAGE ' and all '\$' into \DOLLAR to make parsing regular expressions easier
2876# 4. Convert all \verb|some verbatim text| commands (where | can be an arbitrary character)
2877#    into \verb{hash}  (also lstinline)
2878# 5. Convert \begin{verbatim} some verbatim text \end{verbatim} into \verbatim{hash}  (not only verbatim, all patterns matching VERBATIMENV)
2879# 6. Convert _n into \SUBSCRIPTNB{n} and _{nnn} into \SUBSCRIPT{nn}
2880# 7. Convert ^n into \SUPERSCRIPTNB{n} and ^{nnn} into \SUPERSCRIPT{nn}
2881# 8. a. Convert $$ $$ into \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR}
2882#    b. Convert \[ \] into \begin{SQUAREBRACKET} \end{SQUAREBRACKET}
2883# 9. Convert all picture environmentent (\begin{PICTUREENV} .. \end{PICTUREENV} \PICTUREBLOCKenv
2884#     For math-mode COARSE,WHOLE or NONE option -convert all \begin{MATH} .. \end{MATH}
2885#    into \MATHBLOCKmath{...} commands, where MATH/math is any valid math environment
2886
2887# 10. Add final token STOP to the very end.  This is put in because the algorithm works better if the last token is identical.  This is removed again in postprocessing.
2888#
2889# NB: step 6 and 7 is likely to  convert some "_" inappropriately, e.g. in file
2890#     names or labels but it does not matter because they are converted back in the postprocessing step
2891# Returns: leading white space removed in step 1
2892sub preprocess {
2893  for (@_) {
2894
2895
2896    # change in \verb and similar commands - note that I introduce an extra space here so that the
2897    #       already hashed variants do not trigger again
2898    # transform \lstinline{...}
2899#    s/\\lstinline(\[$brat0\])?(\{(?:.*?)\})/"\\DIFlstinline". $1 ."{". tohash(\%verbhash,"$2") ."}"/esg;
2900#    s/\\lstinline(\[$brat0\])?((\S).*?\2)/"\\DIFlstinline". $1 ."{". tohash(\%verbhash,"$2") ."}"/esg;
2901    s/\\lstinline((?:\[$brat_n\])?)(\{(?:.*?)\})/"\\DIFlstinline". $1 ."{". tohash(\%verbhash,"$2") ."}"/esg;
2902    s/\\lstinline((?:\[$brat_n\])?)(([^\s\w]).*?\3)/"\\DIFlstinline". $1 ."{". tohash(\%verbhash,"$2") ."}"/esg;
2903    s/\\(verb\*?|lstinline)([^\s\w])(.*?)\2/"\\DIF${1}{". tohash(\%verbhash,"${2}${3}${2}") ."}"/esg;
2904
2905    #    Change \{ to \QLEFTBRACE, \} to \QRIGHTBRACE, and \& to \AMPERSAND
2906    s/(?<!\\)\\\{/\\QLEFTBRACE /sg;
2907    s/(?<!\\)\\\}/\\QRIGHTBRACE /sg;
2908    s/(?<!\\)\\&/\\AMPERSAND /sg;
2909# replace {,} in comments with \\CLEFTBRACE,\\CRIGHTBRACE
2910    1 while s/((?<!\\)%.*)\{(.*)$/$1\\CLEFTBRACE $2/mg ;
2911    1 while s/((?<!\\)%.*)\}(.*)$/$1\\CRIGHTBRACE $2/mg ;
2912    s/(?<!\\)\\%/\\PERCENTAGE /g ;  # (?<! is negative lookbehind assertion to prevent \\% from being converted
2913    s/(?<!\\)\\\$/\\DOLLAR /g ;  # (?<! is negative lookbehind assertion to prevent \\$ from being converted
2914    s/\\begin\{($VERBATIMENV)\}(.*?)\\end\{\1\}/"\\${1}{". tohash(\%verbhash,"${2}") . "}"/esg;
2915    s/\\begin\{($VERBATIMLINEENV)\}(.*?)\\end\{\1\}/"\\begin{$1}". linecomment($2) . "\\end{$1}"/esg;
2916
2917    # mark all first empty line (in block of several) with \PAR tokens
2918    s/\n(\s*?)\n((?:\s*\n)*)/\n$1\\PAR\n$2/g ;
2919    # Convert _n or _\cmd into \SUBSCRIPTNB{n} or \SUBSCRIPTNB{\cmd} and _{nnn} into \SUBSCRIPT{nn}
2920    1 while s/(?<!\\)_(\s*([^{\\\s]|\\\w+))/\\SUBSCRIPTNB{$1}/g ;
2921    1 while s/(?<!\\)_(\s*{($pat_n)})/\\SUBSCRIPT$1/g ;
2922    # Convert ^n into \SUPERSCRIPTNB{n} and ^{nnn} into \SUPERSCRIPT{nn}
2923    1 while s/(?<!\\)\^(\s*([^{\\\s]|\\\w+))/\\SUPERSCRIPTNB{$1}/g ;
2924    1 while s/(?<!\\)\^(\s*{($pat_n)})/\\SUPERSCRIPT$1/g ;
2925    # Convert  \sqrt{n} into \SQRT{n}  and  \sqrt nn into SQRTNB{nn}
2926    1 while s/(?<!\\)\\sqrt(\s*([^{\\\s]|\\\w+))/\\SQRTNB{$1}/g ;
2927    1 while s/(?<!\\)\\sqrt(\s*{($pat_n)})/\\SQRT$1/g ;
2928    # Convert $$ $$ into \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR}
2929    s/\$\$(.*?)\$\$/\\begin{DOLLARDOLLAR}$1\\end{DOLLARDOLLAR}/sg;
2930    # Convert \[ \] into \begin{SQUAREBRACKET} \end{SQUAREBRACKET}
2931    s/(?<!\\)\\\[/\\begin{SQUAREBRACKET}/sg;
2932    s/\\\]/\\end{SQUAREBRACKET}/sg;
2933    # Convert all picture environmentent (\begin{PICTUREENV} .. \end{PICTUREENV} \PICTUREBLOCKenv
2934    s/\\begin\{($PICTUREENV)}(.*?)\\end\{\1}/\\PICTUREBLOCK$1\{$2\}/sg;
2935    #    For math-mode COARSE,WHOLE or NONE option -convert all \begin{MATH} .. \end{MATH}
2936    #    into \MATHBLOCKMATH{...} commands, where MATH is any valid math environment
2937    #    Also convert all array environments into ARRAYBLOCK environments
2938
2939    if ( $mathmarkup != FINE ) {
2940      # DIFANCHORARRB and DIFANCHORARRE, DIFANCHORMATHB and DIFANCHORMATHE markers are inserted here to encourage the matching algorithm
2941      # to always match up the closing brace. Otherwise sometimes one ends up with a situation where
2942      # the closing brace is deleted and added at another point. The deleted closing brace is then
2943      # prevented by a %DIFDELCMD, leading to material leaking in or out of the math environment.
2944      # The anchors are removed in post-processing again. (note that they are simple text to cause least amount of complications
2945      # Admittedly, this is something of a hack and will not always work. If it does not, then one needs to
2946      # resort to WHOLE or FINE, or NONE math mode processing.
2947      s/\\begin\{($ARRENV)}(.*?)\\end\{\1}/\\ARRAYBLOCK$1\{$2\\DIFANCHORARRB \}\\DIFANCHORARRE /sg;
2948
2949      take_comments_and_newline_from_frac();
2950
2951      s/\\begin\{($MATHENV|$MATHARRENV|SQUAREBRACKET)\}(.*?)\\end\{\1\}/\\MATHBLOCK$1\{$2\\DIFANCHORMATHB \}\\DIFANCHORMATHE /sg;
2952    }
2953
2954    # add final token " STOP"
2955    $_ .= " STOP"
2956  }
2957}
2958
2959
2960# $expanded=linecomment($string)
2961#preface all lines with verbatim marker (usually DIFVRB)
2962sub linecomment {
2963  my @verbatimlines=split("\n",$_[0]);
2964  # the first line needs special treatment - we do want to retain optional arguments as is but wrap the remainder also with VERBCOMMENT
2965  ### print STDERR "DEBUG: before verbatimlines[0] = ",$verbatimlines[0],"\n";
2966  $verbatimlines[0]=~s/^((?:\s*\[$brat_n\])?\s*)([^\s\[].*)/ defined($2) ? ( "$1\%$VERBCOMMENT$2" ) : ( $1 )/e;
2967  ### print STDERR "DEBUG: after  verbatimlines[0] = ",$verbatimlines[0],"\n";
2968  return(join("\n%$VERBCOMMENT",@verbatimlines)."\n");
2969}
2970
2971# $simple=reverselinecomment($env $string)
2972# remove DIFVRB comments but leave changed lines marked
2973sub reverselinecomment {
2974  my ($environment, $verbatimtext)=@_;
2975  ###print STDERR "OLD VERBATIMTEXT: |$verbatimtext|\n";
2976  # remove markup added by latexdiff
2977  # (this should occur only if the type of verbatim environment was changed)
2978  # (note that this destroys some information in old file)
2979  #  in theory I could save it by moving it out of the verbatim environment
2980  #  but this requires more bookkeeping and is probably not necessary)
2981  $verbatimtext =~ s/\\DIFaddbegin //g;
2982  $verbatimtext =~ s/\\DIFaddend //g;
2983  $verbatimtext =~ s/\\DIFdelbegin //g;
2984  $verbatimtext =~ s/\\DIFdelend //g;
2985  $verbatimtext =~ s/$DELCMDOPEN.*//g;
2986
2987  # remove DIFVRB mark
2988  $verbatimtext=~ s/%$VERBCOMMENT//g;
2989
2990  # remove part of the markup in changed lines
2991  # if any of these substitution was made, then there was at least
2992  # one changed line, and we have to extend the style
2993  if ( $verbatimtext=~ s/$VERBCOMMENT//g ) {
2994    # in the next line we add ~alsolanguage~ modifier, but also deletes the rest of the line after the optional argument, as lstlisting commands gets sometimes
2995    # very confused by what is there   (and othertimes seems to ignore this anyway)
2996    unless ( $verbatimtext =~ s/^(\s*)\[($brat_n)\](.*)\n/$1\[$2,alsolanguage=DIFcode\]\n/ ) {
2997      if ( $verbatimtext =~ m/^\s*\n/ ) {
2998	$verbatimtext = "[alsolanguage=DIFcode]" . $verbatimtext;
2999      } else {
3000	$verbatimtext = "[alsolanguage=DIFcode]\n" . $verbatimtext;
3001      }
3002    }
3003    # There is a bug in listings package (at least v1.5b) for empty comments where the actual comment command is not made invisible
3004    # I therefore have to introduce an artificial '-' character at the end of empty added or deleted lines
3005    $verbatimtext =~ s/($DELCOMMENT\s*)$/$1-/mg;
3006    $verbatimtext = "\\DIFmodbegin\n\\begin{${environment}}${verbatimtext}\\end{${environment}}\n\\DIFmodend"
3007  } else {
3008    $verbatimtext = "\\begin{${environment}}${verbatimtext}\\end{${environment}}"
3009  }
3010  ###print STDERR "NEW VERBATIMTEXT: |$verbatimtext|\n";
3011  return($verbatimtext);
3012}
3013
3014
3015#hashstring=tohash(\%hash,$string)
3016# creates a hash value based on string and stores in %hash
3017sub tohash {
3018  my ($hash,$string)=@_;
3019  my (@arr,$val);
3020  my ($sum,$i)=(0,1);
3021  my ($hstr);
3022
3023  @arr=unpack('c*',$string);
3024
3025  while (1) {
3026    foreach $val (@arr) {
3027      $sum += $i*$val;
3028      $i++;
3029    }
3030    $hstr= "$sum";
3031    last unless (defined($hash->{$hstr}) && $string ne $hash->{$hstr});
3032    # else found a duplicate HASH need to repeat for a higher hash value
3033  }
3034  $hash->{$hstr}=$string;
3035  ###  print STDERR "Hash:$hstr: Content:$string:\n";
3036  return($hstr);
3037}
3038
3039#string=fromhash(\%hash,$fromstring)
3040# restores string value stored in hash
3041#string=fromhash(\%hash,$fromstring,$prependstring)
3042# additionally begins each line with prependstring
3043sub fromhash {
3044  my ($hash,$hstr)=($_[0],$_[1]);
3045  my $retstr=$hash->{$hstr};
3046  if ( $#_ >= 2) {
3047    $retstr =~ s/^/$_[2]/mg;
3048  }
3049  return $retstr;
3050}
3051
3052# writedebugfile(string, label)
3053# if $debug set writes <string> to file latexdiff.debug.<label>
3054# otherwise do nothing
3055sub writedebugfile {
3056  my ($string,$label)=@_;
3057  if ( $debug ) {
3058    open(RAWDIFF,">","latexdiff.debug." . $label);
3059    print RAWDIFF $string;
3060    close(RAWDIFF);
3061  }
3062}
3063
3064
3065# postprocess($string, ..)
3066# carry out the following post-processing steps for all arguments:
3067# * Remove STOP token from the end
3068# * Replace \RIGHTBRACE by }
3069# *  change citation commands within comments to protect from processing (using marker CITEDIF)
3070# 1. Check all deleted blocks:
3071#    a.where a deleted block contains a matching \begin and
3072#      \end environment (these will be disabled by a %DIFDELCMD statements), for selected environments enable
3073#      these commands again (such that for example displayed math in a deleted equation
3074#      is properly within math mode.  For math mode environments replace numbered equation
3075#      environments with their display only variety (so that equation numbers in new file and
3076#      diff file are identical).  Where the correct type of math environment cannot be determined
3077#      use a place holder MATHMODE
3078#    b.where one of the commands matching $COUNTERCMD is used as a DIFAUXCMD, add a statement
3079#      subtracting one from the respective counter to keep numbering consistent with new file
3080#    Replace all MATHMODE environment commands by the correct environment to achieve matching
3081#    pairs
3082#    c. Convert MATHBLOCKmath commands to their uncounted numbers (e.g. convert equation -> displaymath
3083#       (environments defined in $MATHENV will be replaced by $MATHREPL, and  environments in $MATHARRENV
3084#       will be replaced by $MATHARRREPL
3085#    d. If in-line math mode contains array environment, enclose the whole environment in \mbox'es
3086#    d. place \cite commands in mbox'es (for UNDERLINE style)
3087#
3088#   For added blocks:
3089#    c. If in-line math mode contains array environment, enclose the whole environment in \mbox'es
3090#    d. place \cite commands in mbox'es (for UNDERLINE style)
3091#
3092# 2.   If math-mode COARSE,WHOLE or NONE option set: Convert \MATHBLOCKmath{..} commands back to environments
3093#
3094#      Convert all PICTUREblock{..} commands back to the appropriate environments
3095# 3. Convert DIFadd, DIFdel, DIFaddbegin , ... into FL varieties
3096#    within floats (currently recognised float environments: plate,table,figure
3097#    plus starred varieties).
3098# 4. Remove empty %DIFDELCMD < lines
3099# 4. Convert \begin{SQUAREBRACKET} \end{SQUAREBRACKET} into \[ \]
3100#    Convert \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR} into $$ $$
3101# 5. Convert  \SUPERSCRIPTNB{n} into ^n  and  \SUPERSCRIPT{nn} into ^{nnn}
3102# 6. Convert  \SUBSCRIPTNB{n} into _n  and  \SUBCRIPT{nn} into _{nnn}
3103# 7. Expand hashes of verb and verbatim environments
3104# 8. Convert '\PERCENTAGE ' back into '\%' and '\DOLLAR ' into '\$'
3105# 9.. remove all \PAR tokens
3106# 10.  package specific processing:  endfloat: make sure \begin{figure} and \end{figure} are always
3107#      on a line by themselves, similarly for table environment
3108#  4, undo renaming of the \begin, \end,{,}  in comments
3109#    Change \QLEFTBRACE, \QRIGHTBRACE,\AMPERSAND to \{,\},\&
3110#
3111# Note have to manually synchronize substitution commands below and
3112# DIF.. command names in the header
3113sub postprocess {
3114  my ($begin,$len,$cnt,$float,$delblock,$addblock);
3115  # second level blocks
3116  my ($begin2,$cnt2,$len2,$eqarrayblock,$mathblock);
3117
3118  my (@textparts,@newtextparts,@liststack,$listtype,$listlast);
3119
3120  my (@itemargs, $itemarg);
3121
3122
3123  for (@_) {
3124    # change $'s in comments to something harmless
3125    1 while s/(%.*)\$/$1DOLLARDIF/mg ;
3126
3127    # Remove final STOP token
3128    s/ STOP$//;
3129    # Replace \RIGHTBRACE in comments by \MBLOCKRIGHTBRACE
3130    # the only way to get these is as %DIFDELCMD < \RIGHTBRACE construction
3131    # This essentially marks closing right braces of MATHBLOCK environments, which did not get matched
3132    # up. This case should be rare, so I just leave this in the diff file output. Not really elegant
3133    # but can still be dealt with later if it results in problems.
3134    s/%DIFDELCMD < \\RIGHTBRACE/%DIFDELCMD < \\MBLOCKRIGHTBRACE/g ;
3135    # Replace \RIGHTBRACE by }
3136    s/\\RIGHTBRACE/}/g;
3137
3138    # Check all deleted blocks: where a deleted block contains a matching \begin and
3139    #    \end environment (these will be disabled by a %DIFDELCMD statements), enable
3140    #    these commands again (such that for example displayed math in a deleted equation
3141    #    is properly within math mode).  For math mode environments replace numbered equation
3142    #    environments with their display only variety (so that equation numbers in new file and
3143    #    diff file are identical)
3144
3145    while ( m/\\DIFdelbegin.*?\\DIFdelend/sg ) {
3146      ###    while ( m/\\DIFdelbegin.*?\\DIFdelend/sg ) {
3147      ###      print STDERR "DEBUG Match delblock \n||||$&||||\n at ",pos,"\n";
3148      $cnt=0;
3149      $len=length($&);
3150      $begin=pos($_) - $len;
3151      $delblock=$&;
3152      ###   A much simpler method for math replacement might follow this strategy (can recycle part of the commands below for following
3153      ###   this strategy:
3154      ###   1. a Insert aux commands \begin{MATHMODE} or \end{MATHMODE} for all deleted commands opening or closing displayed math mode
3155      ###      b Insert aux commands \begin{MATHARRMODE} or \end{MATHARRMODE} for all deleted commands opening or closing math array mode
3156      ###   2  Replace MATHMODE and MATHARRMODE by correct pairing if appropriate partner  math command is found in text
3157      ###   3  a Replace remaining \begin{MATHMODE}...\end{MATHMODE} pairs with \begin{$MATHREPL}..\end{$MATHREPL}
3158      ###      b Replace remaining \begin{MATHARRMODE}...\end{MATHARRMODE} pairs with \begin{$MATHREPL}..\end{$MATHREPL}
3159      ###   4  Delete all aux command math mode pairs which have simply comments or empty lines between them
3160      ###   As written this won't actually work!
3161
3162
3163      ###   Most general case: allow all included environments
3164      ###      $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{(\w*\*?)\}\s*?\n)(.*?)(\%DIFDELCMD < \s*\\end\{\2\})/$1\\begin{$2}$AUXCMD\n$3\n\\end{$2}$AUXCMD\n$4/sg;
3165      ### (.*?[^\n]?)\n? construct is necessary to avoid empty lines in math mode, which result in
3166      ### an error
3167      # displayed math environments
3168      ###0.5:     $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHREPL}$AUXCMD\n$1$3\n\\end{$MATHREPL}$AUXCMD\n$4/sg;
3169      if ($mathmarkup == FINE ) {
3170	$delblock=~ s/(\%DIFDELCMD < \s*\\begin\{((?:$MATHENV)|SQUAREBRACKET)\}.*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHREPL}$AUXCMD\n$1$3\n\\end{$MATHREPL}$AUXCMD\n$4/sg;
3171	# also transform the opposite pair \end{displaymath} .. \begin{displaymath} but we have to be careful not to interfere with the results of the transformation in the line directly above
3172	### pre-0.42 obsolete version which did not work on eqnarray test      $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHENV)\}\s*?\n)(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/$1\\end{$MATHREPL}$AUXCMD\n$3\n\\begin{$MATHREPL}$AUXCMD\n$4/sg;
3173	###0.5:      $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/\\end{MATHMODE}$AUXCMD\n$1$3\n\\begin{MATHMODE}$AUXCMD\n$4/sg;
3174	$delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{((?:$MATHENV)|SQUAREBRACKET)\}.*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/\\end\{MATHMODE\}$AUXCMD\n$1$3\n\\begin\{MATHMODE\}$AUXCMD\n$4/sg;
3175
3176        # now look for unpaired %DIFDELCMD < \begin{MATHENV}; if found add \begin{$MATHREPL} and insert \end{$MATHREPL}
3177        # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted
3178        if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\begin{$MATHREPL}$AUXCMD\n/sg ) {
3179	  $delblock =~ s/(\\DIFdelend$)/\\end{$MATHREPL}$AUXCMD\n$1/s ;
3180        }
3181        # now look for unpaired %DIFDELCMD < \end{MATHENV}; if found add \end{MATHMODE} and insert \begin{MATHMODE}
3182        # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted
3183        if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\end\{MATHMODE\}$AUXCMD\n/sg ) {
3184	  $delblock =~ s/(\\DIFdelend$)/\\begin\{MATHMODE\}$AUXCMD\n$1/s ;
3185	}
3186
3187
3188	### pre-0.42      # same as above for special case \[.\] (latex abbreviation for displaymath)
3189        ### pre-0.42      $delblock=~ s/(\%DIFDELCMD < \s*\\\[\s*?\n())(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\\])/$1\\\[$AUXCMD\n$3\n\\\]$AUXCMD\n$4/sg;
3190        ### pre-0.42      $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\\]\s*?\n())(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\\[)/$1\\\]$AUXCMD\n$3\n\\\[$AUXCMD\n$4/sg;
3191        # equation array environment
3192        ###pre-0.3      $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}\s*?\n)(.*?)(\%DIFDELCMD < \s*\\end\{\2\})/$1\\begin{$MATHARRREPL}$AUXCMD\n$3\n\\end{$MATHARRREPL}$AUXCMD\n$4/sg;
3193        ###0.5      $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHARRREPL}$AUXCMD\n$1$3\n\\end{$MATHARRREPL}$AUXCMD\n$4/sg;
3194        $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}.*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHARRREPL}$AUXCMD\n$1$3\n\\end{$MATHARRREPL}$AUXCMD\n$4/sg;
3195        ###  pre-0.42 obsolete version which did not work on eqnarray test     $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHARRENV)\}\s*?\n)(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/$1\\end{$MATHARRREPL}$AUXCMD\n$3\n\\begin{$MATHARRREPL}$AUXCMD\n$4/sg;
3196        $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/\\end{MATHMODE}$AUXCMD\n$1$3\n\\begin{MATHMODE}$AUXCMD\n$4/sg;
3197
3198        # now look for unpaired %DIFDELCMD < \begin{MATHARRENV}; if found add \begin{$MATHARRREPL} and insert \end{$MATHARRREPL}
3199        # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted
3200        if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\begin{$MATHARRREPL}$AUXCMD\n/sg ) {
3201	  $delblock =~ s/(\\DIFdelend$)/\\end{$MATHARRREPL}$AUXCMD\n$1/s ;
3202        }
3203        # now look for unpaired %DIFDELCMD < \end{MATHENV}; if found add \end{MATHMODE} and insert \begin{MATHMODE}
3204        # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted
3205        if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\end{MATHMODE}$AUXCMD\n/sg ) {
3206	  $delblock =~ s/(\\DIFdelend$)/\\begin{MATHMODE}$AUXCMD\n$1/s ;
3207        }
3208
3209	# parse $delblock for deleted and reinstated eqnarray* environments - within those reinstate \\ and & commands
3210	###      while ( $delblock =~ m/\\begin{$MATHARRREPL}$AUXCMD\n.*?\n\\end{$MATHARRREPL}$AUXCMD\n/sg ) {
3211        while ( $delblock =~ m/\\begin\Q{$MATHARRREPL}$AUXCMD\E\n.*?\n\\end\Q{$MATHARRREPL}$AUXCMD\E\n/sg ) {
3212	  ###	      print STDERR "DEBUG Match eqarrayblock $& at ",pos,"\n";
3213	  $cnt2=0;
3214	  $len2=length($&);
3215	  $begin2=pos($delblock) - $len2;
3216	  $eqarrayblock=$&;
3217	  # reinstate deleted & and \\ commands
3218	  $eqarrayblock=~ s/(\%DIFDELCMD < \s*(\&|\\\\)\s*?(?:$DELCMDCLOSE|\n))/$1$2$AUXCMD\n/sg ;
3219
3220	  substr($delblock,$begin2,$len2)=$eqarrayblock;
3221	  pos($delblock) = $begin2 + length($eqarrayblock);
3222	}
3223      } elsif ( $mathmarkup == COARSE || $mathmarkup == WHOLE ) {
3224	#       Convert MATHBLOCKmath commands to their uncounted numbers (e.g. convert equation -> displaymath
3225	#       (environments defined in $MATHENV will be replaced by $MATHREPL, and  environments in $MATHARRENV
3226	#       will be replaced by $MATHARRREPL
3227	$delblock=~ s/\\MATHBLOCK($MATHENV)\{($pat_n)\}/\\MATHBLOCK$MATHREPL\{$2\}/sg;
3228	$delblock=~ s/\\MATHBLOCK($MATHARRENV)\{($pat_n)\}/\\MATHBLOCK$MATHARRREPL\{$2\}/sg;
3229      }
3230      # Reinstate completely deleted list environments. note that items within the
3231      # environment will still be commented out.  They will be restored later
3232      $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{($LISTENV)\}\s*?(?:\n|$DELCMDCLOSE))(.*?)(\%DIFDELCMD < \s*\\end\{\2\})/{
3233															###   # block within the search; replacement environment
3234															###   "$1\\begin{$2}$AUXCMD\n". restore_item_commands($3). "\n\\end{$2}$AUXCMD\n$4";
3235															"$1\\begin{$2}$AUXCMD\n$3\n\\end{$2}$AUXCMD\n$4";
3236														       }/esg;
3237
3238      ###      $delblock=~ s/\\begin\{$MATHENV}$AUXCMD/\\begin{$MATHREPL}$AUXCMD/g;
3239      ###      $delblock=~ s/\\end\{$MATHENV}$AUXCMD/\\end{$MATHREPL}$AUXCMD/g;
3240      ###      $delblock=~ s/\\begin\{$MATHARRENV}$AUXCMD/\\begin{$MATHARRREPL}$AUXCMD/g;
3241      ###      $delblock=~ s/\\end\{$MATHARRENV}$AUXCMD/\\end{$MATHARRREPL}$AUXCMD/g;
3242
3243      #    b.where one of the commands matching $COUNTERCMD is used as a DIFAUXCMD, add a statement
3244      #      subtracting one from the respective counter to keep numbering consistent with new file
3245      $delblock=~ s/\\($COUNTERCMD)((?:${extraspace}\[$brat_n\]${extraspace}|${extraspace}\{$pat_n\})*\s*${AUXCMD}\n)/\\$1$2\\addtocounter{$1}{-1}${AUXCMD}\n/sg ;
3246
3247      #    bb. disable active labels within deleted blocks (i.e. those not commented out) (as these are not safe commands, this should normally only
3248      #        happen within deleted maths blocks
3249      ###      $delblock=~ s/(?<!$DELCMDOPEN)(\\$LABELCMD(?:${extraspace})\{(?:[^{}])*\}[\t ]*)\n?/${DELCMDOPEN}$1${DELCMDCLOSE}/smg ;
3250      ###      previous line caused trouble as by issue #90 I might need to modify this
3251      $delblock=~ s/^([^%]*)(\\$LABELCMD(?:${extraspace})\{(?:[^{}])*\}[\t ]*)\n?/$1${DELCMDOPEN}$2${DELCMDCLOSE}/smg ;
3252      ###      print STDERR "<<<$delblock>>>\n" if $debug;
3253
3254
3255      #     c. If in-line math mode contains array environment, enclose the whole environment in \mbox'es
3256      while ( $delblock =~ m/($math)(\s*)/sg ) {
3257	#	      print STDERR "DEBUG Delblock Match math $& at ",pos,"\n";
3258	$cnt2=0;
3259	$len2=length($&);
3260	$begin2=pos($delblock) - $len2;
3261	$mathblock="%\n\\mbox{$AUXCMD\n$1\n}$AUXCMD\n";
3262        next unless ( $mathblock =~ /ARRAYBLOCK/ or $mathblock =~ m/\{$ARRENV\}/ );
3263	substr($delblock,$begin2,$len2)=$mathblock;
3264	pos($delblock) = $begin2 + length($mathblock);
3265      }
3266      ###      if ($CITE2CMD) {
3267      ######   ${extraspace}(?:\[$brat0\]${extraspace}){0,2}\{$pat_n\}))  .*?%%%\n
3268      ###	$delblock=~s/($DELCMDOPEN\s*\\($CITE2CMD)(.*)$DELCMDCLOSE)/
3269      ###	  # Replacement code
3270      ###	  {my ($aux,$all);
3271      ###	   $aux=$all=$1;
3272      ###	   $aux=~s#\n?($DELCMDOPEN|$DELCMDCLOSE)##g;
3273      ###	   $all."$aux$AUXCMD\n";}/sge;
3274      ###      }
3275      ###      # or protect \cite commands with \mbox
3276      ###      if ($CITECMD) {
3277      ######	$delblock=~s/(\\($CITECMD)${extraspace}(?:\[$brat0\]${extraspace}){0,2}\{$pat_n\})(\s*)/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/msg ;
3278      ###	$delblock=~s/(\\($CITECMD)${extraspace}(?:<$abrat0>${extraspace})?(?:\[$brat0\]${extraspace}){0,2}\{$pat_n\})(\s*)/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/msg ;
3279      ###      }
3280      # if MBOXINLINEMATH is set, protect inlined math environments with an extra mbox
3281      if ( $MBOXINLINEMATH ) {
3282	# note additional \newline after command is omitted from output if right at the end of deleted block (otherwise a spurious empty line is generated)
3283	$delblock=~s/($math)(?:[\s\n]*)?/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/sg;
3284      }
3285      ###if ( defined($packages{"listings"} and $latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{color\}/))   {
3286      ###  #     change included verbatim environments
3287      ###  $delblock =~ s/\\DIFverb\{/\\DIFDIFdelverb\{/g;
3288      ###  $delblock =~ s/\\DIFlstinline/\\DIFDIFdellstinline/g;
3289      ###}
3290      # Mark deleted verbose commands
3291      $delblock =~ s/(${DELCMDOPEN}\\DIF((?:verb\*?|lstinline(?:\[$brat_n\])?)\{([-\d]*?)\}\s*).*)$/%\n\\DIFDIFdel$2${AUXCMD}\n$1/gm;
3292      if ( $CUSTOMDIFCMD ) {
3293        $delblock =~ s/(${DELCMDOPEN}.*)\\($CUSTOMDIFCMD)/$1${DELCMDCLOSE}\\DEL$2/gm;
3294      }
3295
3296      #     splice in modified delblock
3297      substr($_,$begin,$len)=$delblock;
3298      pos = $begin + length($delblock);
3299    }
3300    ###writedebugfile($_,'postprocess');
3301
3302    ### print STDERR "<<<$_>>>\n" if $debug;
3303
3304
3305    # make the array modification in added blocks
3306    while ( m/\\DIFaddbegin.*?\\DIFaddend/sg ) {
3307      $cnt=0;
3308      $len=length($&);
3309      $begin=pos($_) - $len;
3310      $addblock=$&;
3311      while ( $addblock =~ m/($math)(\s*)/sg ) {
3312	$cnt2=0;
3313	$len2=length($&);
3314	$begin2=pos($addblock) - $len2;
3315	$mathblock="%\n\\mbox{$AUXCMD\n$1\n}$AUXCMD\n";
3316        next unless ( $mathblock =~ /ARRAYBLOCK/ or $mathblock =~ m/\{$ARRENV\}/) ;
3317	substr($addblock,$begin2,$len2)=$mathblock;
3318	pos($addblock) = $begin2 + length($mathblock);
3319      }
3320      # if MBOXINLINEMATH is set, protect inlined math environments with an extra mbox
3321      if ( $MBOXINLINEMATH ) {
3322	##$addblock=~s/($math)/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/sg;
3323	$addblock=~s/($math)(?:[\s\n]*)?/\\mbox{$AUXCMD\n$1\n}$AUXCMD\n/sg;
3324      }
3325      ###if ( defined($packages{"listings"} and $latexdiffpreamble =~ /\\RequirePackage(?:\[$brat0\])?\{color\}/))   {
3326	# mark added verbatim commands
3327      $addblock =~ s/\\DIFverb/\\DIFDIFaddverb/g;
3328      $addblock =~ s/\\DIFlstinline/\\DIFDIFaddlstinline/g;
3329      if( $CUSTOMDIFCMD ) {
3330        $addblock =~ s/\\($CUSTOMDIFCMD)/\\ADD$1/g;
3331      }
3332      # markup the optional arguments of \item
3333      $addblock =~ s/(\\$ITEMCMD$extraspace(?:<$abrat0>)?$extraspace)\[($brat_n)\]/
3334	@itemargs=splitlatex(substr($2,0,length($2)));
3335        $itemarg="[".join("",marktags("","",$ADDOPEN,$ADDCLOSE,"","",$ADDCOMMENT,\@itemargs))."]";
3336      "$1$itemarg"/sge;   # old substitution: $1\[$ADDOPEN$2$ADDCLOSE\]
3337      ###}
3338#     splice in modified addblock
3339      substr($_,$begin,$len)=$addblock;
3340      pos = $begin + length($addblock);
3341    }
3342
3343    # Go through whole text, and by counting list environment commands, find out when we are within a list environment.
3344    # Within those restore deleted \item commands
3345    @textparts=split /(?<!$DELCMDOPEN)(\\(?:begin|end)\{$LISTENV\})/ ;
3346    @liststack=();
3347    @newtextparts=map {
3348      ### print STDERR ":::::::: $_\n";
3349      if ( ($listtype) = m/^\\begin\{($LISTENV)\}$/ ) {
3350	print STDERR "DEBUG: postprocess \\begin{$listtype}\n" if $debug;
3351	push @liststack,$listtype;
3352      } elsif ( ($listtype) = m/^\\end\{($LISTENV)\}$/ ) {
3353 	print STDERR "DEBUG: postprocess \\end{$listtype}\n" if $debug;
3354	if (scalar  @liststack > 0) {
3355 	  $listlast=pop(@liststack);
3356 	  ($listtype eq $listlast) or warn "Invalid nesting of list environments: $listlast environment closed by \\end{$listtype}.";
3357 	} else {
3358 	  warn "WARNING: Invalid nesting of list environments: \\end{$listtype} encountered without matching \\begin{$listtype}.\n";
3359 	}
3360      } else {
3361	print STDERR "DEBUG: postprocess \@liststack=(",join(",",@liststack),")\n" if $debug;
3362	if (scalar  @liststack > 0 ) {
3363	  # we are within a list environment and should replace all item commands
3364	  $_=restore_item_commands($_);
3365	}
3366	# else: we are outside a list environment and do not need to do anything
3367      }
3368      $_ } @textparts;     # end of map command
3369    # replace the main text with the modified version
3370    $_= join("",@newtextparts);
3371
3372
3373
3374
3375    # Replace MATHMODE environments from step 1a above by the correct Math environment
3376
3377    # The next line is complicated.  The negative look-ahead insertion makes sure that no \end{$MATHENV} (or other mathematical
3378    # environments) are between the \begin{$MATHENV} and \end{MATHMODE} commands. This is necessary as the minimal matching
3379    # is not globally minimal but only 'locally' (matching is beginning from the left side of the string)
3380    if ( $mathmarkup == FINE ) {
3381      1 while s/\\begin\{((?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}((?:.(?!(?:\\end\{(?:(?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}|\\begin\{MATHMODE})))*?)\\end\{MATHMODE}/\\begin{$1}$2\\end{$1}/s;
3382      1 while s/\\begin\{MATHMODE}((?:.(?!\\end\{MATHMODE}))*?)\\end\{((?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}/\\begin{$2}$1\\end{$2}/s;
3383      # convert remaining \begin{MATHMODE} \end{MATHMODE} (and not containing & or \\ )into MATHREPL environments
3384      s/\\begin\{MATHMODE\}((?:(.(?!(?<!\\)\&|\\\\))*)?)\\end\{MATHMODE\}/\\begin{$MATHREPL}$1\\end{$MATHREPL}/sg;
3385      # others into MATHARRREPL
3386      s/\\begin\{MATHMODE\}(.*?)\\end\{MATHMODE\}/\\begin{$MATHARRREPL}$1\\end{$MATHARRREPL}/sg;
3387
3388      # now look for AUXCMD math-mode pairs which have only comments (or empty lines between them), and remove the added commands
3389      s/\\begin\{((?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)\}$AUXCMD\n((?:\s*%.[^\n]*\n)*)\\end\{\1\}$AUXCMD\n/$2/sg;
3390    } else {
3391      #   math modes OFF,WHOLE,COARSE: Convert \MATHBLOCKmath{..} commands back to environments
3392      s/\\MATHBLOCK($MATHENV|$MATHARRENV|SQUAREBRACKET)\{($pat_n)\}/\\begin{$1}$2\\end{$1}/sg;
3393      # convert ARRAYBLOCK.. commands back to environments
3394      s/\\ARRAYBLOCK($ARRENV)\{($pat_n)\}/\\begin{$1}$2\\end{$1}/sg;
3395      # get rid of the DIFANCHOR markers, first the delete comments, then everywhere
3396      s/%DIFDELCMD < \\DIFANCHOR(?:MATH|ARR)[BE] (?:\n%DIFDELCMD < )?%%%\n//g ;
3397      s/\\DIFANCHOR(?:MATH|ARR)[BE] //g;
3398    }
3399
3400    #  Convert all PICTUREblock{..} commands back to the appropriate environments
3401    s/\\PICTUREBLOCK($PICTUREENV)\{($pat_n)\}/\\begin{$1}$2\\end{$1}/sg;
3402#0.5:    # Remove all mark up within picture environments
3403#     while ( m/\\begin\{($PICTUREENV)\}.*?\\end\{\1\}/sg ) {
3404#       $cnt=0;
3405#       $len=length($&);
3406#       $begin=pos($_) - $len;
3407#       $float=$&;
3408#       $float =~ s/\\DIFaddbegin //g;
3409#       $float =~ s/\\DIFaddend //g;
3410#       $float =~ s/\\DIFadd\{($pat_n)\}/$1/g;
3411#       $float =~ s/\\DIFdelbegin //g;
3412#       $float =~ s/\\DIFdelend //g;
3413#       $float =~ s/\\DIFdel\{($pat_n)\}//g;
3414#       $float =~ s/$DELCMDOPEN.*//g;
3415#       substr($_,$begin,$len)=$float;
3416#       pos = $begin + length($float);
3417#     }
3418    # Convert DIFadd, DIFdel, DIFFaddbegin , ... into  varieties
3419    #    within floats (currently recognised float environments: plate,table,figure
3420    #    plus starred varieties).
3421    while ( m/\\begin\{($FLOATENV)\}.*?\\end\{\1\}/sg ) {
3422      $cnt=0;
3423      $len=length($&);
3424      $begin=pos($_) - $len;
3425      $float=$&;
3426      $float =~ s/\\DIFaddbegin /\\DIFaddbeginFL /g;
3427      $float =~ s/\\DIFaddend /\\DIFaddendFL /g;
3428      $float =~ s/\\DIFadd\{/\\DIFaddFL{/g;
3429      $float =~ s/\\DIFdelbegin /\\DIFdelbeginFL /g;
3430      $float =~ s/\\DIFdelend /\\DIFdelendFL /g;
3431      $float =~ s/\\DIFdel\{/\\DIFdelFL{/g;
3432      substr($_,$begin,$len)=$float;
3433      pos = $begin + length($float);
3434    }
3435    ### former location of undo renaming of \begin and \end in comments
3436
3437    # remove empty DIFCMD < lines
3438    s/^\Q${DELCMDOPEN}\E\n//msg;
3439
3440    # Expand hashes of verb and verbatim environments (note negative look behind assertion to not leak out of DIFDELCMD comments
3441    s/${DELCMDOPEN}\\($VERBATIMENV)\{([-\d]*?)\}/"${DELCMDOPEN}\\begin{${1}}".fromhash(\%verbhash,$2,$DELCMDOPEN)."${DELCMDOPEN}\\end{${1}}"/esg;
3442    # revert changes to verbatim environments for line diffs (and add code to mark up changes)
3443    s/(?<!$DELCMDOPEN)\\begin\{($VERBATIMLINEENV)\}(.*?)\\end\{\1\}/"". reverselinecomment($1, $2) .""/esg;
3444#    # we do the same for deleted environments but additionally reinstate the framing commands
3445#   s/$DELCMDOPEN\\begin\{($VERBATIMLINEENV)\}$extraspace(?:\[$brat0\])?$DELCMDCLOSE(.*?)$DELCMDOPEN\\end\{\1\}$DELCMDCLOSE/"\\begin{$1}". reverselinecomment($2) . "\\end{$1}"/esg;
3446##    s/$DELCMDOPEN\\begin\{($VERBATIMLINEENV)\}($extraspace(?:\[$brat0\])?\s*)(?:\n|$DELCMDOPEN)*$DELCMDCLOSE((?:\%$DELCOMMENT$VERBCOMMENT.*?\n)*)($DELCMDOPEN\\end\{\1\}(?:\n|\s|$DELCMDOPEN)*$DELCMDCLOSE)/"SUBSTITUTION: \\begin{$1}$2 INTERIOR: |$3| END: |$4|"/esg;
3447     s/ # Deleted \begin command of verbatim environment (Captures $1: whole deleted command, $2: environment, $3: optional arguments with white space
3448          (\Q$DELCMDOPEN\E\\begin\{($VERBATIMLINEENV)\}(\Q$extraspace\E(?:\[$brat_n\])?\s*)(?:\n|\Q$DELCMDOPEN\E)*\Q$DELCMDCLOSE\E)
3449        # Interior of deleted verbatim environment should consist entirely of delete DIFVRB comments, i.e. match only lines beginning with % DIF < DIFVRB
3450        #   Captures: $4: all lines combined
3451          ((?:\%\Q$DELCOMMENT$VERBCOMMENT\E[^\n]*?\n)*)
3452        # Deleted \end command of verbatim environment. Note that the type is forced to match the opening. Captures: $5: Whole deleted environment  (previous way this line was written: (\Q$DELCMDOPEN\E\\end\{\2\}(?:\n|\s|\Q$DELCMDOPEN\E)*\Q$DELCMDCLOSE\E)
3453          (\Q$DELCMDOPEN\E\\end\{\2\})
3454      / # Substitution part
3455            $1                   # Leave expression as is
3456            . "$AUXCMD NEXT\n"   # Mark the following line as an auxiliary command
3457            . ""    # reinstate the original environment without options
3458            . reverselinecomment($2, "$3$4")   # modify the body to change the markup; reverselinecomment parses for options
3459            . " $AUXCMD\n"  # close the auxiliary environment
3460            . $5               # and again leave the original deleted closing environment as is
3461      /esgx;  # Modifiers of substitution command
3462    # where changes have occurred in verbatim environment, change verbatim to DIFverbatim to allow mark-up
3463    # (I use the presence of optional paramater to verbatim environment as the marker - normal verbatim
3464    # environment does not take optional arguments)
3465    s/(?<!$DELCMDOPEN)\\begin\{(verbatim[*]?)\}(\[$brat_n\].*?)\\end\{\1\}/\\begin{DIF$1}$2\\end{DIF$1}/sg;
3466
3467    s/\\($VERBATIMENV)\{([-\d]*?)\}/"\\begin{${1}}".fromhash(\%verbhash,$2)."\\end{${1}}"/esg;
3468
3469
3470    # remove all \PAR tokens (taking care to properly keep commented out PAR's
3471    # from introducing uncommented newlines - next line)
3472    s/(%DIF < )([^\n]*?)\\PAR\n/$1$2\n$1\n/sg;
3473    # convert PAR commands which are on a line by themselves
3474    s/\n(\s*?)\\PAR\n/\n\n/sg;
3475    # convert remaining PAR commands (which are preceded by non-white space characters, usually "}" ($ADDCLOSE)
3476    s/\\PAR\n/\n\n/sg;
3477
3478    #  package specific processing:
3479    if ( defined($packages{"endfloat"})) {
3480      #endfloat: make sure \begin{figure} and \end{figure} are always
3481      #      on a line by themselves, similarly for table environment
3482      print STDERR "endfloat package detected.\n" if $verbose ;
3483      # eliminate whitespace before and after
3484      s/^(\s*)(\\(?:end|begin)\{(?:figure|table)\})(\s*?)$/$2/mg;
3485      # split lines with remaining characters before float environment conmmand
3486      s/^([^%]+)(\\(?:begin|end)\{(?:figure|table)\})/$1\n$2/mg;
3487      # split lines with remaining characters after float environment conmmand
3488      s/^((?:[^%]+)\\(?:begin|end)\{(?:figure|table)\}(?:\[[a-zA-Z]+\])?)(.+)((?:%.*)?)$/$1\n$2$3/mg;
3489    }
3490
3491    # Convert '\PERCENTAGE ' back into '\%' (the final question mark catches a special situation where due to a latter pre-processing step the ' ' becomes separated
3492    s/\\PERCENTAGE ?/\\%/g;
3493    # Convert '\DOLLAR ' back into '\$'
3494    s/\\DOLLAR /\\\$/g;
3495
3496    # undo renaming of the \begin and \end,{,}  and dollars in comments
3497
3498    # although we only renamed $ in comments to DOLLARDIFF, we might have lost the % in unchanged verbatim blocks, so rename all
3499    s/DOLLARDIF/\$/g;
3500#   Convert \begin{SQUAREBRACKET} \end{SQUAREBRACKET} into \[ \]
3501    s/\\end\{SQUAREBRACKET\}/\\\]/sg;
3502    s/\\begin\{SQUAREBRACKET\}/\\\[/sg;
3503# 4. Convert \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR} into $$ $$
3504    s/\\begin\{DOLLARDOLLAR\}(.*?)\\end\{DOLLARDOLLAR\}/\$\$$1\$\$/sg;
3505# 5. Convert  \SUPERSCRIPTNB{n} into ^n  and  \SUPERSCRIPT{nn} into ^{nnn}
3506    1 while s/\\SUPERSCRIPT(\s*\{($pat_n)\})/^$1/g ;
3507    1 while s/\\SUPERSCRIPTNB\{(\s*$pat0)\}/^$1/g ;
3508    # Convert  \SUBSCRIPNB{n} into _n  and  \SUBCRIPT{nn} into _{nnn}
3509    1 while s/\\SUBSCRIPT(\s*\{($pat_n)\})/_$1/g ;
3510    1 while s/\\SUBSCRIPTNB\{(\s*$pat0)\}/_$1/g ;
3511    # Convert  \SQRT{n} into \sqrt{n}  and  \SQRTNB{nn} into \sqrt nn
3512    1 while s/\\SQRT(\s*\{($pat_n)\})/\\sqrt$1/g ;
3513    1 while s/\\SQRTNB\{(\s*$pat0)\}/\\sqrt$1/g ;
3514
3515    1 while s/(%.*)\\CRIGHTBRACE (.*)$/$1\}$2/mg ;
3516    1 while s/(%.*)\\CLEFTBRACE (.*)$/$1\{$2/mg ;
3517
3518
3519#    Change \QLEFTBRACE, \QRIGHTBRACE to \{,\}
3520    s/\\QLEFTBRACE /\\\{/sg;
3521    s/\\QRIGHTBRACE /\\\}/sg;
3522    s/\\AMPERSAND /\\&/sg;
3523    # Highligh added inline verbatim commands if possible
3524    if ( $latexdiffpreamble =~ /\\RequirePackage(?:\[$brat_n\])?\{color\}/ )   {
3525      # wrap added verb commands with color commands
3526      s/\\DIFDIFadd((?:verb\*?|lstinline(?:\[$brat_n\])?)\{[-\d]*?\}[\s\n]*)/\{\\color{blue}$AUXCMD\n\\DIF$1%\n\}$AUXCMD\n/sg;
3527      s/\\DIFDIFdel((?:verb\*?|lstinline(?:\[$brat_n\])?)\{[-\d]*?\}[\s\n]*$AUXCMD)/\{\\color{red}${AUXCMD}\n\\DIF$1\n\}${AUXCMD}/sg;
3528    } else {
3529      # currently if colour markup is not used just remove the added mark
3530      s/\\DIFDIFadd(verb\*?|lstinline)/\\DIF$1/sg;
3531      s/\\DIFDIFdel((?:verb\*?|lstinline(?:\[$brat_n\])?)\{[-\d]*?\}[\s\n]*$AUXCMD\n)//sg;
3532    }
3533    # expand \verb and friends inline arguments
3534    s/\\DIF((?:DIFadd|DIFdel)?(?:verb\*?|lstinline(?:\[$brat_n\])?))\{([-\d]*?)\}/"\\${1}". fromhash(\%verbhash,$2)/esg;
3535    # add basicstyle color{blue} to added lstinline commands
3536    # finally add the comment to the ones not having an optional argument before
3537    ###s/\\DIFaddlstinline(?!\[)/\\lstinline\n[basicstyle=\\color{blue}]$AUXCMD\n/g;
3538
3539  return;
3540  }
3541}
3542
3543# $out = restore_item_commands($listenviron)
3544# short helper function for post-process, which restores deleted \item commands in its argument (as DIFAUXCMDs)
3545sub restore_item_commands {
3546  my ($string)=@_ ;
3547  my ($itemarg,@itemargs);
3548  $string =~ s/(\%DIFDELCMD < \s*(\\$ITEMCMD$extraspace)((?:<$abrat0>)?$extraspace)((?:\[$brat_n\])?)\s*((?:${cmdoptseq}\s*?)*)(?:\n|$DELCMDCLOSE))/
3549     # if \item has an []argument, then mark up the argument as deleted)
3550     if (length($4)>0) {
3551       # use substr to exclude square brackets at end points
3552       @itemargs=splitlatex(substr($4,1,length($4)-2));
3553       $itemarg="[".join("",marktags("","",$DELOPEN,$DELCLOSE,$DELCMDOPEN,$DELCMDCLOSE,$DELCOMMENT,\@itemargs))."]";
3554     } else {
3555       $itemarg="";
3556     }
3557     "$1$2$3$itemarg$AUXCMD\n";  ###.((length($5)>0) ? "%DIFDELCMD $5 $DELCMDCLOSE\n" : "")
3558     /sge;
3559  return($string);
3560}
3561
3562
3563# @auxlines=preprocess_preamble($oldpreamble,$newpreamble);
3564  # pre-process preamble by looking for commands used in \maketitle (title, author, date etc commands)
3565  # the list of commands is defined in CONTEXT2CMD
3566  # if found then use a bodydiff to mark up content, and replace the corresponding commands
3567  # in both preambles by marked up version to 'fool' the linediff (such that only body is marked up).
3568  # A special case are e.g. author commands being added (or removed)
3569  # 1. If commands are added, then the entire content is marked up as new, but also the lines are marked as new in the linediff
3570  # 2. If commands are removed, then the linediff will mark the line as deleted.  The program returns
3571  #    with $auxlines a text to be appended at the end of the preamble, which shows the respective fields as deleted
3572sub preprocess_preamble {
3573  my ($oldpreambleref,$newpreambleref)=(\$_[0],\$_[1]) ;
3574  my @auxlines=();
3575  # Remember to use $$oldpreambleref to refer to oldpreamble
3576  my ($titlecmd,$titlecmdpat);
3577  my (@oldtitlecommands,@newtitlecommands );
3578  my  %oldhash  = ();
3579  my  %newhash  = ();
3580  my ($line,$cmd,$optarg,$arg,$optargnew,$optargold,$optargdiff,$argold,$argnew,$argdiff,$auxline);
3581
3582  my $warnmsgdetail = <<EOF ;
3583     This should not occur for standard styles, but can occur for some specifiy styles, document classes,
3584     e.g. journal house styles.
3585     Workaround: Use --replace-context2cmd option to specifically set those commands, which are not repeated.
3586EOF
3587
3588  # resuse context2cmdlist to define these commands to  look out for in preamble
3589  $titlecmd = "(?:".join("|",@CONTEXT2CMDLIST).")";
3590  # as context2cmdlist is stored as regex, e.g. ((?-xism:^title$), we need to remove ^- fo
3591  # resue in a more complex regex
3592  $titlecmd =~ s/[\$\^]//g;
3593  # make sure to not match on comment lines:
3594  $titlecmdpat=qr/^(?:[^%\n]|\\%)*(\\($titlecmd)$extraspace(?:\[($brat_n)\])?(?:\{($pat_n)\}))/ms;
3595  ###print STDERR "DEBUG:",$titlecmdpat,"\n";
3596  @oldtitlecommands= ( $$oldpreambleref =~ m/$titlecmdpat/g );
3597  @newtitlecommands= ( $$newpreambleref =~ m/$titlecmdpat/g );
3598
3599
3600  while ( @oldtitlecommands ) {
3601    $line=shift @oldtitlecommands;
3602    $cmd=shift @oldtitlecommands;
3603    $optarg=shift @oldtitlecommands;
3604    $arg=shift @oldtitlecommands;
3605
3606    if ( defined($oldhash{$cmd})) {
3607      warn "WARNING: $cmd is used twice in preamble of old file. Reverting to pure line diff mode for preamble.\n";
3608      print STDERR $warnmsgdetail;
3609      return;
3610    }
3611    $oldhash{$cmd}=[ $line, $optarg, $arg ];
3612  }
3613  while ( @newtitlecommands ) {
3614    $line=shift @newtitlecommands;
3615    $cmd=shift @newtitlecommands;
3616    $optarg=shift @newtitlecommands;
3617    $arg=shift @newtitlecommands;
3618
3619    if ( defined($newhash{$cmd})) {
3620      warn "$cmd is used twice in preamble of new file. Reverting to pure line diff mode for preamble.\n";
3621      print STDERR $warnmsgdetail;
3622      return;
3623    }
3624    $newhash{$cmd}=[ $line, $optarg, $arg ];
3625  }
3626  foreach $cmd ( keys %newhash ) {
3627    if ( defined($newhash{$cmd}->[1])) {
3628       $optargnew=$newhash{$cmd}->[1];
3629    } else {
3630      $optargnew="";
3631    }
3632    if ( defined($oldhash{$cmd}->[1])) {
3633       $optargold=$oldhash{$cmd}->[1];
3634    } else {
3635      $optargold="";
3636    }
3637
3638    if ( defined($oldhash{$cmd}->[2]) ) {
3639      $argold=$oldhash{$cmd}->[2];
3640    } else {
3641      $argold="";
3642    }
3643    $argnew=$newhash{$cmd}->[2];
3644    $argdiff="{" . join("",bodydiff($argold,$argnew)) ."}";
3645    # Replace \RIGHTBRACE by }
3646    $argdiff =~ s/\\RIGHTBRACE/}/g;
3647
3648    if ( length $optargnew ) {
3649      $optargdiff="[".join("",bodydiff($optargold,$optargnew))."]" ;
3650      $optargdiff =~ s/\\DIFaddbegin /\\DIFaddbeginFL /g;
3651      $optargdiff =~ s/\\DIFaddend /\\DIFaddendFL /g;
3652      $optargdiff =~ s/\\DIFadd\{/\\DIFaddFL{/g;
3653      $optargdiff =~ s/\\DIFdelbegin /\\DIFdelbeginFL /g;
3654      $optargdiff =~ s/\\DIFdelend /\\DIFdelendFL /g;
3655      $optargdiff =~ s/\\DIFdel\{/\\DIFdelFL{/g;
3656    } else {
3657      $optargdiff="";
3658    }
3659    ### print STDERR "DEBUG s/\\Q$newhash{$cmd}->[0]\\E/\\$cmd$optargdiff$argdiff/s\n";
3660    # Note: \Q and \E force literal interpretation of what it between them but allow
3661    #      variable interpolation, such that e.g. \title matches just that and not TAB-itle
3662    $$newpreambleref=~s/\Q$newhash{$cmd}->[0]\E/\\$cmd$optargdiff$argdiff/s;
3663    # replace this in old preamble if necessary
3664    if ( defined($oldhash{$cmd}->[0])) {
3665      $$oldpreambleref=~s/\Q$oldhash{$cmd}->[0]\E/\\$cmd$optargdiff$argdiff/s ;
3666    }
3667    ### print STDERR "DEBUG NEW PRE ".$$newpreambleref."\n";
3668  }
3669
3670  foreach $cmd ( keys %oldhash ) {
3671    # if this has already been dealt with above can just skip
3672    next if defined($newhash{$cmd}) ;
3673    $argold=$oldhash{$cmd}->[2];
3674    $argdiff="{" . join("",bodydiff($argold,"")) ."}";
3675    if ( defined($oldhash{$cmd}->[1])) {
3676      $optargold=$oldhash{$cmd}->[1];
3677      $optargdiff="[".join("",bodydiff($optargold,""))."]" ;
3678      $optargdiff =~ s/\\DIFdelbegin /\\DIFdelbeginFL /g;
3679      $optargdiff =~ s/\\DIFdelend /\\DIFdelendFL /g;
3680      $optargdiff =~ s/\\DIFdel\{/\\DIFdelFL{/g;
3681    } else {
3682      $optargdiff="";
3683    }
3684    $auxline = "\\$cmd$optargdiff$argdiff";
3685    $auxline =~s/$/$AUXCMD/sg;
3686    push @auxlines,$auxline;
3687  }
3688  # add auxcmd comment to highlight added lines
3689  return(@auxlines);
3690}
3691
3692
3693
3694# @diffs=linediff(\@seq1, \@seq2)
3695# mark up lines like this
3696#%DIF mm-mmdnn
3697#%< old deleted line(s)
3698#%DIF -------
3699#%DIF mmann-nn
3700#new appended line %<
3701#%DIF -------
3702# Future extension: mark change explicitly
3703# Assumes: traverse_sequence traverses deletions before insertions in changed sequences
3704#          all line numbers relative to line 0 (first line of real file)
3705sub linediff {
3706  my $seq1 = shift ;
3707  my $seq2 = shift ;
3708
3709  my $block = [];
3710  my $retseq = [];
3711  my @begin=('','',''); # dummy initialisation
3712  my $instring ;
3713
3714  my $discard = sub { @begin=('d',$_[0],$_[1]) unless scalar @$block ;
3715                      push(@$block, "%DIF < " . $seq1->[$_[0]]) };
3716  my $add = sub { if (! scalar  @$block) {
3717		    @begin=('a',$_[0],$_[1]) ;}
3718		  elsif ( $begin[0] eq 'd' ) {
3719                    $begin[0]='c'; $begin[2]=$_[1];
3720		    push(@$block, "%DIF -------") }
3721                  push(@$block,  $seq2->[$_[1]] . " %DIF > " ) };
3722  my $match = sub { if ( scalar @$block ) {
3723                      if ( $begin[0] eq 'd' && $begin[1]!=$_[0]-1) {
3724			$instring = sprintf "%%DIF %d-%dd%d",$begin[1],$_[0]-1,$begin[2]; }
3725		      elsif ( $begin[0] eq 'a' && $begin[2]!=$_[1]-1) {
3726			$instring = sprintf "%%DIF %da%d-%d",$begin[1],$begin[2],$_[1]-1; }
3727		      elsif ( $begin[0] eq 'c' ) {
3728			$instring = sprintf "%%DIF %sc%s",
3729			                     ($begin[1]==$_[0]-1) ? "$begin[1]" : $begin[1]."-".($_[0]-1)  ,
3730			                     ($begin[2]==$_[1]-1) ? "$begin[2]" : $begin[2]."-".($_[1]-1)  ; }
3731		      else {
3732			$instring = sprintf "%%DIF %d%s%d",$begin[1],$begin[0],$begin[2]; }
3733		      push @$retseq, $instring,@$block, "%DIF -------" ;
3734		      $block = [];
3735		    }
3736		    push @$retseq, $seq2->[$_[1]]
3737		  };
3738  # key function: remove multiple spaces (such that insertion or deletion of redundant white space is not reported)
3739  my $keyfunc = sub { join("  ",split(" ",shift())) };
3740
3741  traverse_sequences($seq1,$seq2, { MATCH=>$match, DISCARD_A=>$discard, DISCARD_B=>$add }, $keyfunc );
3742  push @$retseq, @$block if scalar @$block;
3743
3744  return wantarray ? @$retseq : $retseq ;
3745}
3746
3747
3748
3749# init_regex_arr_data(\@array,"TOKEN INIT")
3750# scans DATA file handel for line "%% TOKEN INIT" line
3751# then appends each line not beginning with % into array (as a quoted regex)
3752# This is used for command lists and configuration variables, but the processing is slightly
3753# different:
3754# For lists, the regular expression is extended to include beginning (^) and end ($) markers, to require full-string matching
3755# For configuration variables (and all others), simply an unadorned list is copied
3756sub init_regex_arr_data {
3757  my ($arr,$token)=@_;
3758  my $copy=0;
3759  my ($mode);
3760  if ($token =~ m/COMMANDS/ ) {
3761    $mode=0;  # Reading command list
3762  } else {
3763    $mode=1;  # Reading configuration variables
3764  }
3765
3766  while (<DATA>) {
3767    if ( m/^%%BEGIN $token\s*$/ ) {
3768      $copy=1;
3769      next;
3770    } elsif ( m/^%%END $token\s*$/ )  {
3771      last; }
3772    chomp;
3773    if ( $mode==0 ) {
3774#      print STDERR "DEBUG init_regex_arr_data regex >$_<\n" if ($debug && $copy);
3775      push (@$arr,qr/^$_$/) if ( $copy && !/^%/ ) ;
3776    } elsif ($mode==1) {
3777      push (@$arr,"$_") if ( $copy && !/^%/ ) ;
3778    }
3779  }
3780  seek DATA,0,0;    # rewind DATA handle to file begin
3781}
3782
3783
3784# init_regex_arr_ext(\@array,$arg)
3785# appends array with regular expressions.
3786# if arg is a file name, then read in list of regular expressions from that file
3787# (one expression per line)
3788# Otherwise treat arg as a comma separated list of regular expressions
3789sub init_regex_arr_ext {
3790  my ($arr,$arg)=@_;
3791  if ( -f $arg ) {
3792    init_regex_arr_file($arr,$arg);
3793  } else {
3794    init_regex_arr_list($arr,$arg);
3795  }
3796}
3797
3798# init_regex_arr_file(\@array,$fname)
3799# appends array with regular expressions.
3800# Read in list of regular expressions from $fname
3801# (one expression per line)
3802sub init_regex_arr_file {
3803  my ($arr,$fname)=@_;
3804  open(FILE,"$fname") or die ("Couldn't open $fname: $!");
3805  while (<FILE>) {
3806    chomp;
3807    next if /^\s*#/ || /^\s*%/ || /^\s*$/ ;
3808    push (@$arr,qr/^$_$/);
3809  }
3810  close(FILE);
3811}
3812
3813# init_regex_arr_list(\@array,$arg)
3814# appends array with regular expressions.
3815# read from comma separated list of regular expressions ($arg)
3816sub init_regex_arr_list {
3817  my ($arr,$arg)=@_;
3818  my $regex;
3819  ###    print STDERR "DEBUG init_regex_arr_list arg >$arg<\n" if $debug;
3820  foreach $regex (split(qr/(?<!\\),/,$arg)) {
3821    $regex =~ s/\\,/,/g;
3822    print STDERR "DEBUG init_regex_arr_list regex >$regex<\n" if $debug;
3823    push (@$arr,qr/^$regex$/);
3824  }
3825}
3826
3827
3828#exetime() returns time since last execution of this command
3829#exetime(1) resets this time
3830my $lasttime=-1;   # global variable for persistence
3831sub exetime {
3832  my $reset=0;
3833  my $retval;
3834  if ((scalar @_) >=1) {
3835    $reset=shift;
3836  }
3837  if ($reset) {
3838    $lasttime=times();
3839  }
3840  else {
3841    $retval=times()-$lasttime;
3842    $lasttime=$lasttime+$retval;
3843    return($retval);
3844  }
3845}
3846
3847
3848sub usage {
3849  die <<"EOF";
3850Usage: $0 [options] old.tex new.tex > diff.tex
3851
3852Compares two latex files and writes tex code to stdout, which has the same format as new.tex but
3853has all changes relative to old.tex marked up or commented. Note that old.tex and new.tex need to
3854be real files (not pipes or similar) as they are opened twice.
3855
3856--type=markupstyle
3857-t markupstyle         Add code to preamble for selected markup style
3858                       Available styles: UNDERLINE CTRADITIONAL TRADITIONAL CFONT FONTSTRIKE INVISIBLE
3859                                         CHANGEBAR CCHANGEBAR CULINECHBAR CFONTCHBAR BOLD PDFCOMMENT
3860                       [ Default: UNDERLINE ]
3861
3862--subtype=markstyle
3863-s markstyle           Add code to preamble for selected style for bracketing
3864                       commands (e.g. to mark changes in  margin)
3865                       Available styles: SAFE MARGIN DVIPSCOL COLOR ZLABEL ONLYCHANGEDPAGE (LABEL)*
3866                       [ Default: SAFE ]
3867                       * LABEL subtype is deprecated
3868
3869--floattype=markstyle
3870-f markstyle           Add code to preamble for selected style which
3871                       replace standard marking and markup commands within floats
3872                       (e.g., marginal remarks cause an error within floats
3873                       so marginal marking can be disabled thus)
3874                       Available styles: FLOATSAFE IDENTICAL
3875                       [ Default: FLOATSAFE ]
3876
3877--encoding=enc
3878-e enc                 Specify encoding of old.tex and new.tex. Typical encodings are
3879                       ascii, utf8, latin1, latin9.  A list of available encodings can be
3880                       obtained by executing
3881                       perl -MEncode -e 'print join ("\\n",Encode->encodings( ":all" )) ;'
3882                       [Default encoding is utf8 unless the first few lines of the preamble contain
3883                       an invocation "\\usepackage[..]{inputenc} in which case the
3884                       encoding chosen by this command is asssumed. Note that ASCII (standard
3885                       latex) is a subset of utf8]
3886
3887--preamble=file
3888-p file                Insert file at end of preamble instead of auto-generating
3889                       preamble.  The preamble must define the following commands
3890                       \\DIFaddbegin,\\DIFaddend,\\DIFadd{..},
3891                       \\DIFdelbegin,\\DIFdelend,\\DIFdel{..},
3892                       and varieties for use within floats
3893                       \\DIFaddbeginFL,\\DIFaddendFL,\\DIFaddFL{..},
3894                       \\DIFdelbeginFL,\\DIFdelendFL,\\DIFdelFL{..}
3895                       (If this option is set -t, -s, and -f options
3896                       are ignored.)
3897
3898--exclude-safecmd=exclude-file
3899--exclude-safecmd="cmd1,cmd2,..."
3900-A exclude-file
3901--replace-safecmd=replace-file
3902--append-safecmd=append-file
3903--append-safecmd="cmd1,cmd2,..."
3904-a append-file         Exclude from, replace or append to the list of regex
3905                       matching commands which are safe to use within the
3906                       scope of a \\DIFadd or \\DIFdel command.  The file must contain
3907                       one Perl-RegEx per line (Comment lines beginning with # or % are
3908                       ignored). A literal comma within the comma-separated list must be
3909                       escaped thus "\\,",   Note that the RegEx needs to match the whole of
3910                       the token, i.e., /^regex\$/ is implied and that the initial
3911                       "\\" of the command is not included. The --exclude-safecmd
3912                       and --append-safecmd options can be combined with the --replace-safecmd
3913                       option and can be used repeatedly to add cumulatively to the lists.
3914
3915--exclude-textcmd=exclude-file
3916--exclude-textcmd="cmd1,cmd2,..."
3917-X exclude-file
3918--replace-textcmd=replace-file
3919--append-textcmd=append-file
3920--append-textcmd="cmd1,cmd2,..."
3921-x append-file         Exclude from, replace or append to the list of regex
3922                       matching commands whose last argument is text.  See
3923                       entry for --exclude-safecmd directly above for further details.
3924
3925--replace-context1cmd=replace-file
3926--append-context1cmd=append-file
3927--append-context1cmd="cmd1,cmd2,..."
3928                       Replace or append to the list of regex matching commands
3929                       whose last argument is text but which require a particular
3930                       context to work, e.g. \\caption will only work within a figure
3931                       or table.  These commands behave like text commands, except when
3932                       they occur in a deleted section, when they are disabled, but their
3933                       argument is shown as deleted text.
3934
3935--replace-context2cmd=replace-file
3936--append-context2cmd=append-file
3937--append-context2cmd="cmd1,cmd2,..."
3938                       As corresponding commands for context1.  The only difference is that
3939                       context2 commands are completely disabled in deleted sections, including
3940                       their arguments.
3941                       context2 commands are also the only commands in the preamble, whose argument will
3942                       be processed in word-by-word mode (which only works, if they occur no more than
3943		       once in the preamble).
3944
3945--exclude-mboxsafecmd=exclude-file
3946--exclude-mboxsafecmd="cmd1,cmd2,..."
3947--append-mboxsafecmd=append-file
3948--append-mboxsafecmd="cmd1,cmd2,..."
3949                       Define safe commands, which additionally need to be protected by encapsulating
3950                       in an \\mbox{..}. This is sometimes needed to get around incompatibilities
3951                       between external packages and the ulem package, which is  used for highlighting
3952                       in the default style UNDERLINE as well as CULINECHBAR CFONTSTRIKE
3953
3954
3955
3956--config var1=val1,var2=val2,...
3957-c var1=val1,..        Set configuration variables.
3958-c configfile           Available variables:
3959                          ARRENV (RegEx)
3960                          COUNTERCMD (RegEx)
3961                          FLOATENV (RegEx)
3962                          ITEMCMD (RegEx)
3963                          LISTENV (RegEx)
3964                          MATHARRENV (RegEx)
3965                          MATHARRREPL (String)
3966                          MATHENV (RegEx)
3967                          MATHREPL (String)
3968                          MINWORDSBLOCK (Integer)
3969                          PICTUREENV (RegEx)
3970                          SCALEDELGRAPHICS (Float)
3971                          VERBATIMENV (RegEx)
3972                          VERBATIMLINEENV (RegEx)
3973                          CUSTOMDIFCMD (RegEx)
3974                       This option can be repeated.
3975
3976--add-to-config  varenv1=pattern1,varenv2=pattern2
3977                       For configuration variables containing a regular expression (essentially those ending
3978                       in ENV, and COUNTERCMD) this provides an alternative way to modify the configuration
3979                       variables. Instead of setting the complete pattern, with this option it is possible to add an
3980                       alternative pattern. varenv must be one of the variables listed above that take a regular
3981                       expression as argument, and pattern is any regular expression (which might need to be
3982                       protected from the shell by quotation). Several patterns can be added at once by using semi-colons
3983                       to separate them, e.g. --add-to-config "LISTENV=myitemize;myenumerate,COUNTERCMD=endnote"
3984
3985--packages=pkg1,pkg2,..
3986                       Tell latexdiff that .tex file is processed with the packages in list
3987                       loaded.  This is normally not necessary if the .tex file includes the
3988                       preamble, as the preamble is automatically scanned for \\usepackage commands.
3989                       Use of the --packages option disables automatic scanning, so if for any
3990                       reason package specific parsing needs to be switched off, use --packages=none.
3991                       The following packages trigger special behaviour:
3992                       endfloat hyperref amsmath apacite siunitx cleveref glossaries mhchem chemformula/chemmacros
3993                       biblatex
3994                       [ Default: scan the preamble for \\usepackage commands to determine
3995                         loaded packages.]
3996
3997--show-preamble        Print generated or included preamble commands to stdout.
3998
3999--show-safecmd         Print list of regex matching and excluding safe commands.
4000
4001--show-textcmd         Print list of regex matching and excluding commands with text argument.
4002
4003--show-config          Show values of configuration variables
4004
4005--show-all             Show all of the above
4006
4007   NB For all --show commands, no old.tex or new.tex file needs to be given, and no
4008      differencing takes place.
4009
4010Other configuration options:
4011
4012--allow-spaces         Allow spaces between bracketed or braced arguments to commands
4013                       [Default requires arguments to directly follow each other without
4014                                intervening spaces]
4015
4016--math-markup=level    Determine granularity of markup in displayed math environments:
4017                      Possible values for level are (both numerical and text labels are acceptable):
4018                      off or 0: suppress markup for math environments.  Deleted equations will not
4019                               appear in diff file. This mode can be used if all the other modes
4020                               cause invalid latex code.
4021                      whole or 1: Differencing on the level of whole equations. Even trivial changes
4022                               to equations cause the whole equation to be marked changed.  This
4023                               mode can be used if processing in coarse or fine mode results in
4024                               invalid latex code.
4025                      coarse or 2: Detect changes within equations marked up with a coarse
4026                               granularity; changes in equation type (e.g.displaymath to equation)
4027                               appear as a change to the complete equation. This mode is recommended
4028                               for situations where the content and order of some equations are still
4029                               being changed. [Default]
4030                      fine or 3: Detect small change in equations and mark up and fine granularity.
4031                               This mode is most suitable, if only minor changes to equations are
4032                               expected, e.g. correction of typos.
4033
4034--graphics-markup=level   Change highlight style for graphics embedded with \\includegraphics commands
4035                      Possible values for level:
4036                      none,off or 0: no highlighting for figures
4037                      new-only or 1: surround newly added or changed figures with a blue frame [Default]
4038                      both or 2:     highlight new figures with a blue frame and show deleted figures
4039                                at reduced scale, and crossed out with a red diagonal cross. Use configuration
4040                                variable SCALEDELGRAPHICS to set size of deleted figures.
4041                      Note that changes to the optional parameters will make the figure appear as changed
4042                      to latexdiff, and this figure will thus be highlighted.
4043
4044--disable-citation-markup
4045--disable-auto-mbox    Suppress citation markup and markup of other vulnerable commands in styles
4046                       using ulem (UNDERLINE,FONTSTRIKE, CULINECHBAR)
4047                       (the two options are identical and are simply aliases)
4048
4049--enable-citation-markup
4050--enforce-auto-mbox    Protect citation commands and other vulnerable commands in changed sections
4051                       with \\mbox command, i.e. use default behaviour for ulem package for other packages
4052                       (the two options are identical and are simply aliases)
4053
4054Miscelleneous options
4055
4056--label=label
4057-L label               Sets the labels used to describe the old and new files.  The first use
4058                       of this option sets the label describing the old file and the second
4059                       use of the option sets the label for the new file.
4060                       [Default: use the filename and modification dates for the label]
4061
4062--no-label             Suppress inclusion of old and new file names as comment in output file
4063
4064--visible-label         Include old and new filenames (or labels set with --label option) as
4065                       visible output
4066
4067--flatten              Replace \\input and \\include commands within body by the content
4068                       of the files in their argument.  If \\includeonly is present in the
4069                       preamble, only those files are expanded into the document. However,
4070                       no recursion is done, i.e. \\input and \\include commands within
4071                       included sections are not expanded.  The included files are assumed to
4072                       be located in the same directories as the old and new master files,
4073                       respectively, making it possible to organise files into old and new directories.
4074                       --flatten is applied recursively, so inputted files can contain further
4075                       \\input statements.  Also handles files included by the import package
4076                       (\\import and \\subimport), and \\subfile command.
4077
4078--filter-script=filterscript    Run files through this filterscript (full path preferred) before processing.
4079                       The filterscript must take STDIN input and output to STDOUT.
4080                       When coupled with --flatten, each file will be run through the filter as it is brought in.
4081
4082--ignore-filter-stderr When running with --filter-script, STDERR from the script may cause readability issues.
4083                       Turn this flag on to ignore STDERR from the filter script.
4084
4085
4086
4087--help
4088-h                     Show this help text.
4089
4090--ignore-warnings      Suppress warnings about inconsistencies in length between input
4091                       and parsed strings and missing characters.
4092
4093--verbose
4094-V                     Output various status information to stderr during processing.
4095                       Default is to work silently.
4096
4097--version              Show version number.
4098
4099Internal options:
4100These options are mostly for automated use by latexdiff-vc. They can be used directly, but
4101the API should be considered less stable than for the other options.
4102
4103--no-links             Suppress generation of hyperreferences, used for minimal diffs
4104                       (option --only-changes of latexdiff-vc).
4105EOF
4106}
4107
4108=head1 NAME
4109
4110latexdiff - determine and markup differences between two latex files
4111
4112=head1 SYNOPSIS
4113
4114B<latexdiff> [ B<OPTIONS> ] F<old.tex> F<new.tex> > F<diff.tex>
4115
4116=head1 DESCRIPTION
4117
4118Briefly, I<latexdiff> is a utility program to aid in the management of
4119revisions of latex documents. It compares two valid latex files, here
4120called C<old.tex> and C<new.tex>, finds significant differences
4121between them (i.e., ignoring the number of white spaces and position
4122of line breaks), and adds special commands to highlight the
4123differences.  Where visual highlighting is not possible, e.g. for changes
4124in the formatting, the differences are
4125nevertheless marked up in the source. Note that old.tex and new.tex need to
4126be real files (not pipes or similar) as they are opened twice (unless C<--encoding> option is used)
4127
4128The program treats the preamble differently from the main document.
4129Differences between the preambles are found using line-based
4130differencing (similarly to the Unix diff command, but ignoring white
4131spaces).  A comment, "S<C<%DIF E<gt>>>" is appended to each added line, i.e. a
4132line present in C<new.tex> but not in C<old.tex>.  Discarded lines
4133 are deactivated by prepending "S<C<%DIF E<lt>>>". Changed blocks are preceded  by
4134comment lines giving information about line numbers in the original files.  Where there are insignificant
4135differences, the resulting file C<diff.tex> will be similar to
4136C<new.tex>.  At the end of the preamble, the definitions for I<latexdiff> markup commands are inserted.
4137In differencing the main body of the text, I<latexdiff> attempts to
4138satisfy the following guidelines (in order of priority):
4139
4140=over 3
4141
4142=item 1
4143
4144If both C<old.tex> and C<new.tex> are valid LaTeX, then the resulting
4145C<diff.tex> should also be valid LateX. (NB If a few plain TeX commands
4146are used within C<old.tex> or C<new.tex> then C<diff.tex> is not
4147guaranteed to work but usually will).
4148
4149=item 2
4150
4151Significant differences are determined on the level of
4152individual words. All significant differences, including differences
4153between comments should be clearly marked in the resulting source code
4154C<diff.tex>.
4155
4156=item 3
4157
4158If a changed passage contains text or text-producing commands, then
4159running C<diff.tex> through LateX should produce output where added
4160and discarded passages are highlighted.
4161
4162=item 4
4163
4164Where there are insignificant differences, e.g. in the positioning of
4165line breaks, C<diff.tex> should follow the formatting of C<new.tex>
4166
4167=back
4168
4169For differencing the same algorithm as I<diff> is used but words
4170instead of lines are compared.  An attempt is made to recognize
4171blocks which are completely changed such that they can be marked up as a unit.
4172Comments are differenced line by line
4173but the number of spaces within comments is ignored. Commands including
4174all their arguments are generally compared as one unit, i.e., no mark-up
4175is inserted into the arguments of commands.  However, for a selected
4176number of commands (for example, C<\caption> and all sectioning
4177commands) the last argument is known to be text. This text is
4178split into words and differenced just as ordinary text (use options to
4179show and change the list of text commands, see below). As the
4180algorithm has no detailed knowledge of LaTeX, it assumes all pairs of
4181curly braces immediately following a command (i.e. a sequence of
4182letters beginning with a backslash) are arguments for that command.
4183As a restriction to condition 1 above it is thus necessary to surround
4184all arguments with curly braces, and to not insert
4185extraneous spaces.  For example, write
4186
4187  \section{\textem{This is an emphasized section title}}
4188
4189and not
4190
4191  \section {\textem{This is an emphasized section title}}
4192
4193or
4194
4195  \section\textem{This is an emphasized section title}
4196
4197even though all varieties are the same to LaTeX (but see
4198B<--allow-spaces> option which allows the second variety).
4199
4200For environments whose content does not conform to standard LaTeX or
4201where graphical markup does not make sense all markup commands can be
4202removed by setting the PICTUREENV configuration variable, set by
4203default to C<picture> and C<DIFnomarkup> environments; see B<--config>
4204option).  The latter environment (C<DIFnomarkup>) can be used to
4205protect parts of the latex file where the markup results in illegal
4206markup. You have to surround the offending passage in both the old and
4207new file by C<\begin{DIFnomarkup}> and C<\end{DIFnomarkup}>. You must
4208define the environment in the preambles of both old and new
4209documents. I prefer to define it as a null-environment,
4210
4211C<\newenvironment{DIFnomarkup}{}{}>
4212
4213but the choice is yours.  Any markup within the environment will be
4214removed, and generally everything within the environment will just be
4215taken from the new file.
4216
4217It is also possible to difference files which do not have a preamble.
4218 In this case, the file is processed in the main document
4219mode, but the definitions of the markup commands are not inserted.
4220
4221All markup commands inserted by I<latexdiff> begin with "C<\DIF>".  Added
4222blocks containing words, commands or comments which are in C<new.tex>
4223but not in C<old.tex> are marked by C<\DIFaddbegin> and C<\DIFaddend>.
4224Discarded blocks are marked by C<\DIFdelbegin> and C<\DIFdelend>.
4225Within added blocks all text is highlighted with C<\DIFadd> like this:
4226C<\DIFadd{Added text block}>
4227Selected `safe' commands can be contained in these text blocks as well
4228(use options to show and change the list of safe commands, see below).
4229All other commands as well as braces "{" and "}" are never put within
4230the scope of C<\DIFadd>.  Added comments are marked by prepending
4231"S<C<%DIF E<gt> >>".
4232
4233Within deleted blocks text is highlighted with C<\DIFdel>.  Deleted
4234comments are marked by prepending "S<C<%DIF E<lt> >>".  Non-safe command
4235and curly braces within deleted blocks are commented out with
4236"S<C<%DIFDELCMD E<lt> >>".
4237
4238
4239
4240=head1 OPTIONS
4241
4242=head2 Preamble
4243
4244The following options determine the visual markup style by adding the appropriate
4245command definitions to the preamble. See the end of this section for a description of
4246available styles.
4247
4248=over 4
4249
4250=item B<--type=markupstyle> or
4251B<-t markupstyle>
4252
4253Add code to preamble for selected markup style. This option defines
4254C<\DIFadd> and C<\DIFdel> commands.
4255Available styles:
4256
4257C<UNDERLINE CTRADITIONAL TRADITIONAL CFONT FONTSTRIKE INVISIBLE
4258CHANGEBAR CCHANGEBAR CULINECHBAR CFONTCHBAR BOLD PDFCOMMENT>
4259
4260[ Default: C<UNDERLINE> ]
4261
4262=item B<--subtype=markstyle> or
4263B<-s markstyle>
4264
4265Add code to preamble for selected style for bracketing
4266commands (e.g. to mark changes in  margin). This option defines
4267C<\DIFaddbegin>, C<\DIFaddend>, C<\DIFdelbegin> and C<\DIFdelend> commands.
4268Available styles: C<SAFE MARGIN COLOR DVIPSCOL  ZLABEL ONLYCHANGEDPAGE (LABEL)*>
4269
4270[ Default: C<SAFE> ]
4271* Subtype C<LABEL> is deprecated
4272
4273=item B<--floattype=markstyle> or
4274B<-f markstyle>
4275
4276Add code to preamble for selected style which
4277replace standard marking and markup commands within floats
4278(e.g., marginal remarks cause an error within floats
4279so marginal marking can be disabled thus). This option defines all
4280C<\DIF...FL> commands.
4281Available styles: C<FLOATSAFE TRADITIONALSAFE IDENTICAL>
4282
4283[ Default: C<FLOATSAFE> ]
4284
4285=item B<--encoding=enc> or
4286B<-e enc>
4287
4288Specify encoding of old.tex and new.tex. Typical encodings are
4289C<ascii>, C<utf8>, C<latin1>, C<latin9>.  A list of available encodings can be
4290obtained by executing
4291
4292C<perl -MEncode -e 'print join ("\n",Encode->encodings( ":all" )) ;' >
4293
4294If this option is used, then old.tex, new.tex are only opened once.
4295[Default encoding is utf8 unless the first few lines of the preamble contain
4296an invocation C<\usepackage[..]{inputenc}> in which case the
4297encoding chosen by this command is asssumed. Note that ASCII (standard
4298latex) is a subset of utf8]
4299
4300=item B<--preamble=file> or
4301B<-p file>
4302
4303Insert file at end of preamble instead of generating
4304preamble.  The preamble must define the following commands
4305C<\DIFaddbegin, \DIFaddend, \DIFadd{..},
4306\DIFdelbegin,\DIFdelend,\DIFdel{..},>
4307and varieties for use within floats
4308C<\DIFaddbeginFL, \DIFaddendFL, \DIFaddFL{..},
4309\DIFdelbeginFL, \DIFdelendFL, \DIFdelFL{..}>
4310(If this option is set B<-t>, B<-s>, and B<-f> options
4311are ignored.)
4312
4313=item B<--packages=pkg1,pkg2,..>
4314
4315Tell latexdiff that .tex file is processed with the packages in list
4316loaded.  This is normally not necessary if the .tex file includes the
4317preamble, as the preamble is automatically scanned for C<\usepackage> commands.
4318Use of the B<--packages> option disables automatic scanning, so if for any
4319reason package specific parsing needs to be switched off, use B<--packages=none>.
4320The following packages trigger special behaviour:
4321
4322=over 8
4323
4324=item C<amsmath>
4325
4326Configuration variable MATHARRREPL is set to C<align*> (Default: C<eqnarray*>). (Note that many of the
4327amsmath array environments are already recognised by default as such)
4328
4329=item C<endfloat>
4330
4331Ensure that C<\begin{figure}> and C<\end{figure}> always appear by themselves on a line.
4332
4333=item C<hyperref>
4334
4335Change name of C<\DIFadd> and C<\DIFdel> commands to C<\DIFaddtex> and C<\DIFdeltex> and
4336define new C<\DIFadd> and C<\DIFdel> commands, which provide a wrapper for these commands,
4337using them for the text but not for the link defining command (where any markup would cause
4338errors).
4339
4340=item C<apacite>, C<biblatex>
4341
4342Redefine the commands recognised as citation commands.
4343
4344=item C<siunitx>
4345
4346Treat C<\SI> as equivalent to citation commands (i.e. protect with C<\mbox> if markup style uses ulem package.
4347
4348=item C<cleveref>
4349
4350Treat C<\cref,\Cref>, etc as equivalent to citation commands (i.e. protect with C<\mbox> if markup style uses ulem package.
4351
4352=item C<glossaries>
4353
4354Define most of the glossaries commands as safe, protecting them with \mbox'es where needed
4355
4356=item C<mhchem>
4357
4358Treat C<\ce> as a safe command, i.e. it will be highlighted (note that C<\cee> will not be highlighted in equations as this leads to processing errors)
4359
4360=item C<chemformula> or C<chemmacros>
4361
4362Treat C<\ch> as a safe command outside equations, i.e. it will be highlighted (note that C<\ch> will not be highlighted in equations as this leads to processing errors)
4363
4364
4365=back
4366
4367[ Default: scan the preamble for C<\usepackage> commands to determine
4368  loaded packages. ]
4369
4370
4371
4372=item B<--show-preamble>
4373
4374Print generated or included preamble commands to stdout.
4375
4376=back
4377
4378=head2 Configuration
4379
4380=over 4
4381
4382=item B<--exclude-safecmd=exclude-file> or
4383B<-A exclude-file> or  B<--exclude-safecmd="cmd1,cmd2,...">
4384
4385=item B<--replace-safecmd=replace-file>
4386
4387=item B<--append-safecmd=append-file> or
4388B<-a append-file> or B<--append-safecmd="cmd1,cmd2,...">
4389
4390Exclude from, replace or append to the list of regular expressions (RegEx)
4391matching commands which are safe to use within the
4392scope of a C<\DIFadd> or C<\DIFdel> command.  The file must contain
4393one Perl-RegEx per line (Comment lines beginning with # or % are
4394ignored).  Note that the RegEx needs to match the whole of
4395the token, i.e., /^regex$/ is implied and that the initial
4396"\" of the command is not included.
4397The B<--exclude-safecmd> and B<--append-safecmd> options can be combined with the -B<--replace-safecmd>
4398option and can be used repeatedly to add cumulatively to the lists.
4399 B<--exclude-safecmd>
4400and B<--append-safecmd> can also take a comma separated list as input. If a
4401comma for one of the regex is required, escape it thus "\,". In most cases it
4402will be necessary to protect the comma-separated list from the shell by putting
4403it in quotation marks.
4404
4405=item B<--exclude-textcmd=exclude-file> or
4406B<-X exclude-file> or B<--exclude-textcmd="cmd1,cmd2,...">
4407
4408=item B<--replace-textcmd=replace-file>
4409
4410=item B<--append-textcmd=append-file> or
4411B<-x append-file> or B<--append-textcmd="cmd1,cmd2,...">
4412
4413Exclude from, replace or append to the list of regular expressions
4414matching commands whose last argument is text.  See
4415entry for B<--exclude-safecmd> directly above for further details.
4416
4417
4418=item B<--replace-context1cmd=replace-file>
4419
4420=item B<--append-context1cmd=append-file> or
4421
4422=item B<--append-context1cmd="cmd1,cmd2,...">
4423
4424Replace or append to the list of regex matching commands
4425whose last argument is text but which require a particular
4426context to work, e.g. C<\caption> will only work within a figure
4427or table.  These commands behave like text commands, except when
4428they occur in a deleted section, when they are disabled, but their
4429argument is shown as deleted text.
4430
4431=item B<--replace-context2cmd=replace-file>
4432
4433=item B<--append-context2cmd=append-file> or
4434
4435=item B<--append-context2cmd="cmd1,cmd2,...">
4436
4437As corresponding commands for context1.  The only difference is that
4438context2 commands are completely disabled in deleted sections, including
4439their arguments.
4440
4441context2 commands are also the only commands in the preamble, whose argument will be processed in
4442word-by-word mode (which only works, if they occur no more than once in the preamble). The algorithm currently cannot cope with repeated context2 commands in the preamble, as they occur e.g. for the C<\author> argument in some journal styles (not in the standard styles, though
4443If such a repetition is detected, the whole preamble will be processed in line-by-line mode. In such a case, use C<--replace-context2cmd> option to just select the commands, which should be processed and are not used repeatedly in the preamble.
4444
4445
4446
4447=item B<--exclude-mboxsafecmd=exclude-file> or B<--exclude-mboxsafecmd="cmd1,cmd2,...">
4448
4449=item B<--append-mboxsafecmd=append-file> or B<--append-mboxsafecmd="cmd1,cmd2,...">
4450
4451Define safe commands, which additionally need to be protected by encapsulating
4452in an C<\mbox{..}>. This is sometimes needed to get around incompatibilities
4453between external packages and the ulem package, which is  used for highlighting
4454in the default style UNDERLINE as well as CULINECHBAR CFONTSTRIKE
4455
4456
4457
4458
4459
4460=item B<--config var1=val1,var2=val2,...> or B<-c var1=val1,..>
4461
4462=item B<-c configfile>
4463
4464Set configuration variables.  The option can be repeated to set different
4465variables (as an alternative to the comma-separated list).
4466Available variables (see below for further explanations):
4467
4468C<ARRENV> (RegEx)
4469
4470C<COUNTERCMD> (RegEx)
4471
4472C<CUSTODIFCMD> (RegEx)
4473
4474C<FLOATENV> (RegEx)
4475
4476C<ITEMCMD> (RegEx)
4477
4478C<LISTENV>  (RegEx)
4479
4480C<MATHARRENV> (RegEx)
4481
4482C<MATHARRREPL> (String)
4483
4484C<MATHENV> (RegEx)
4485
4486C<MATHREPL> (String)
4487
4488C<MINWORDSBLOCK> (Integer)
4489
4490C<PICTUREENV> (RegEx)
4491
4492C<SCALEDELGRAPHICS> (Float)
4493
4494
4495=item B<--add-to-config varenv1=pattern1,varenv2=pattern2,...>
4496
4497For configuration variables, which are a regular expression (essentially those ending
4498in ENV, COUNTERCMD and CUSTOMDIFCMD, see list above) this option provides an alternative way to modify the configuration
4499variables. Instead of setting the complete pattern, with this option it is possible to add an
4500alternative pattern. C<varenv> must be one of the variables listed above that take a regular
4501expression as argument, and pattern is any regular expression (which might need to be
4502protected from the shell by quotation). Several patterns can be added at once by using semi-colons
4503to separate them, e.g. C<--add-to-config "LISTENV=myitemize;myenumerate,COUNTERCMD=endnote">
4504
4505=item B<--show-safecmd>
4506
4507Print list of RegEx matching and excluding safe commands.
4508
4509=item B<--show-textcmd>
4510
4511Print list of RegEx matching and excluding commands with text argument.
4512
4513=item B<--show-config>
4514
4515Show values of configuration variables.
4516
4517=item B<--show-all>
4518
4519Combine all --show commands.
4520
4521NB For all --show commands, no C<old.tex> or C<new.tex> file needs to be specified, and no
4522differencing takes place.
4523
4524=back
4525
4526=head2 Other configuration options:
4527
4528=over 4
4529
4530=item B<--allow-spaces>
4531
4532Allow spaces between bracketed or braced arguments to commands.  Note
4533that this option might have undesirable side effects (unrelated scope
4534might get lumpeded with preceding commands) so should only be used if the
4535default produces erroneous results.  (Default requires arguments to
4536directly follow each other without intervening spaces).
4537
4538=item B<--math-markup=level>
4539
4540Determine granularity of markup in displayed math environments:
4541Possible values for level are (both numerical and text labels are acceptable):
4542
4543C<off> or C<0>: suppress markup for math environments.  Deleted equations will not
4544appear in diff file. This mode can be used if all the other modes
4545cause invalid latex code.
4546
4547C<whole> or C<1>: Differencing on the level of whole equations. Even trivial changes
4548to equations cause the whole equation to be marked changed.  This
4549mode can be used if processing in coarse or fine mode results in
4550invalid latex code.
4551
4552C<coarse> or C<2>: Detect changes within equations marked up with a coarse
4553granularity; changes in equation type (e.g.displaymath to equation)
4554appear as a change to the complete equation. This mode is recommended
4555for situations where the content and order of some equations are still
4556being changed. [Default]
4557
4558C<fine> or C<3>: Detect small change in equations and mark up at fine granularity.
4559This mode is most suitable, if only minor changes to equations are
4560expected, e.g. correction of typos.
4561
4562=item B<--graphics-markup=level>
4563
4564 Change highlight style for graphics embedded with C<\includegraphics> commands.
4565
4566Possible values for level:
4567
4568C<none>, C<off> or C<0>: no highlighting for figures
4569
4570C<new-only> or C<1>: surround newly added or changed figures with a blue frame [Default if graphicx package loaded]
4571
4572C<both> or C<2>:     highlight new figures with a blue frame and show deleted figures at reduced
4573scale, and crossed out with a red diagonal cross. Use configuration
4574variable SCALEDELGRAPHICS to set size of deleted figures.
4575
4576Note that changes to the optional parameters will make the figure appear as changed
4577to latexdiff, and this figure will thus be highlighted
4578
4579=item B<--disable-citation-markup> or B<--disable-auto-mbox>
4580
4581Suppress citation markup and markup of other vulnerable commands in styles
4582using ulem (UNDERLINE,FONTSTRIKE, CULINECHBAR)
4583(the two options are identical and are simply aliases)
4584
4585=item B<--enable-citation-markup> or B<--enforce-auto-mbox>
4586
4587Protect citation commands and other vulnerable commands in changed sections
4588with C<\mbox> command, i.e. use default behaviour for ulem package for other packages
4589(the two options are identical and are simply aliases)
4590
4591=back
4592
4593=head2 Miscellaneous
4594
4595=over 4
4596
4597=item B<--verbose> or B<-V>
4598
4599Output various status information to stderr during processing.
4600Default is to work silently.
4601
4602=item B<--driver=type>
4603
4604Choose driver for changebar package (only relevant for styles using
4605   changebar: CCHANGEBAR CFONTCHBAR CULINECHBAR CHANGEBAR). Possible
4606drivers are listed in changebar manual, e.g. pdftex,dvips,dvitops
4607  [Default: dvips]
4608
4609=item B<--ignore-warnings>
4610
4611Suppress warnings about inconsistencies in length between input and
4612parsed strings and missing characters.  These warning messages are
4613often related to non-standard latex or latex constructions with a
4614syntax unknown to C<latexdiff> but the resulting difference argument
4615is often fully functional anyway, particularly if the non-standard
4616latex only occurs in parts of the text which have not changed.
4617
4618=item B<--label=label> or
4619B<-L label>
4620
4621Sets the labels used to describe the old and new files.  The first use
4622of this option sets the label describing the old file and the second
4623use of the option sets the label for the new file, i.e. set both
4624labels like this C<-L labelold -L labelnew>.
4625[Default: use the filename and modification dates for the label]
4626
4627=item B<--no-label>
4628
4629Suppress inclusion of old and new file names as comment in output file
4630
4631=item B<--visible-label>
4632
4633Include old and new filenames (or labels set with C<--label> option) as
4634visible output.
4635
4636=item B<--flatten>
4637
4638Replace C<\input> and C<\include> commands within body by the content
4639of the files in their argument.  If C<\includeonly> is present in the
4640preamble, only those files are expanded into the document. However,
4641no recursion is done, i.e. C<\input> and C<\include> commands within
4642included sections are not expanded.  The included files are assumed to
4643 be located in the same directories as the old and new master files,
4644respectively, making it possible to organise files into old and new directories.
4645--flatten is applied recursively, so inputted files can contain further
4646C<\input> statements.  Also handles files included by the import package
4647(C<\import> and C<\subimport>), and C<\subfile> command.
4648
4649Use of this option might result in prohibitive processing times for
4650larger documents, and the resulting difference document
4651no longer reflects the structure of the input documents.
4652
4653=item B<--filter-script=filterscript>
4654
4655Run files through this filterscript (full path preferred) before processing.
4656The filterscript must take STDIN input and output to STDOUT.
4657When coupled with --flatten, each file will be run through the filter as it is brought in.
4658
4659=item B<--ignore-filter-stderr>
4660
4661When running with --filter-script, STDERR from the script may cause readability issues.
4662Turn this flag on to ignore STDERR from the filter script.
4663
4664
4665
4666=item B<--help> or
4667B<-h>
4668
4669Show help text
4670
4671=item B<--version>
4672
4673Show version number
4674
4675=back
4676
4677
4678=head2 Internal options
4679
4680These options are mostly for automated use by latexdiff-vc. They can be used directly, but the API should be considered less stable than for the other options.
4681
4682=over 4
4683
4684=item B<--no-links>
4685
4686Suppress generation of hyperreferences, used for minimal diffs (option --only-changes of latexdiff-vc)
4687
4688=back
4689
4690
4691=head2 Predefined styles
4692
4693=head2 Major types
4694
4695The major type determine the markup of plain text and some selected latex commands outside floats by defining the markup commands C<\DIFadd{...}> and C<\DIFdel{...}> .
4696
4697=over 10
4698
4699=item C<UNDERLINE>
4700
4701Added text is wavy-underlined and blue, discarded text is struck out and red
4702(Requires color and ulem packages).  Overstriking does not work in displayed math equations such that deleted parts of equation are underlined, not struck out (this is a shortcoming inherent to the ulem package).
4703
4704=item C<CTRADITIONAL>
4705
4706Added text is blue and set in sans-serif, and a red footnote is created for each discarded
4707piece of text. (Requires color package)
4708
4709=item C<TRADITIONAL>
4710
4711Like C<CTRADITIONAL> but without the use of color.
4712
4713=item C<CFONT>
4714
4715Added text is blue and set in sans-serif, and discarded text is red and very small size.
4716
4717=item C<FONTSTRIKE>
4718
4719Added tex is set in sans-serif, discarded text small and struck out
4720
4721=item C<CCHANGEBAR>
4722
4723Added text is blue, and discarded text is red.  Additionally, the changed text is marked with a bar in the margin (Requires color and changebar packages).
4724
4725=item C<CFONTCHBAR>
4726
4727Like C<CFONT> but with additional changebars (Requires color and changebar packages).
4728
4729=item C<CULINECHBAR>
4730
4731Like C<UNDERLINE> but with additional changebars (Requires color, ulem and changebar packages).
4732
4733=item C<CHANGEBAR>
4734
4735No mark up of text, but mark margins with changebars (Requires changebar package).
4736
4737=item C<INVISIBLE>
4738
4739No visible markup (but generic markup commands will still be inserted.
4740
4741=item C<BOLD>
4742
4743Added text is set in bold face, discarded is not shown.
4744
4745=item C<PDFCOMMENT>
4746
4747The pdfcomment package is used to underline new text, and mark deletions with a PDF comment. Note that this markup might appear differently or not at all based on the pdf viewer used. The viewer with best support for pdf markup is probably acroread. This style is only recommended if the number of differences is small.
4748
4749=back
4750
4751=head2 Subtypes
4752
4753The subtype defines the commands that are inserted at the begin and end of added or discarded blocks, irrespectively of whether these blocks contain text or commands (Defined commands: C<\DIFaddbegin, \DIFaddend, \DIFdelbegin, \DIFdelend>)
4754
4755=over 10
4756
4757=item C<SAFE>
4758
4759No additional markup (Recommended choice)
4760
4761=item C<MARGIN>
4762
4763Mark beginning and end of changed blocks with symbols in the margin nearby (using
4764the standard C<\marginpar> command - note that this sometimes moves somewhat
4765from the intended position.
4766
4767=item C<COLOR>
4768
4769An alternative way of marking added passages in blue, and deleted ones in red.
4770(It is recommeneded to use instead the main types to effect colored markup,
4771although in some cases coloring with dvipscol can be more complete, for example
4772with citation commands).
4773
4774=item C<DVIPSCOL>
4775
4776An alternative way of marking added passages in blue, and deleted ones in red. Note
4777that C<DVIPSCOL> only works with the dvips converter, e.g. not pdflatex.
4778(it is recommeneded to use instead the main types to effect colored markup,
4779although in some cases coloring with dvipscol can be more complete).
4780
4781
4782=item C<ZLABEL>
4783
4784can be used to highlight only changed pages, but requires post-processing. It is recommend to not call this option manually but use C<latexdiff-vc> with C<--only-changes> option. Alternatively, use the script given within preamble of diff files made using this style.
4785
4786=item C<ONLYCHANGEDPAGE>
4787
4788also highlights changed pages, without the need for post-processing, but might not work reliably if
4789there is floating material (figures, tables).
4790
4791=item C<LABEL>
4792
4793is similar to C<ZLABEL>, but does not need the zref package and works less reliably (deprecated).
4794
4795=back
4796
4797=head2 Float Types
4798
4799Some of the markup used in the main text might cause problems when used within
4800floats (e.g. figures or tables).  For this reason alternative versions of all
4801markup commands are used within floats. The float type defines these alternative commands.
4802
4803=over 10
4804
4805=item C<FLOATSAFE>
4806
4807Use identical markup for text as in the main body, but set all commands marking the begin and end of changed blocks to null-commands.  You have to choose this float type if your subtype is C<MARGIN> as C<\marginpar> does not work properly within floats.
4808
4809=item C<TRADITIONALSAFE>
4810
4811Mark additions the same way as in the main text.  Deleted environments are marked by angular brackets \[ and \] and the deleted text is set in scriptscript size. This float type should always be used with the C<TRADITIONAL> and  C<CTRADITIONAL> markup types as the \footnote command does not work properly in floating environments.
4812
4813=item C<IDENTICAL>
4814
4815Make no difference between the main text and floats.
4816
4817=back
4818
4819
4820=head2 Configuration Variables
4821
4822=over 10
4823
4824=item C<ARRENV>
4825
4826If a match to C<ARRENV> is found within an inline math environment within a deleted or added block, then the inlined math
4827is surrounded by C<\mbox{>...C<}>.  This is necessary as underlining does not work within inlined array environments.
4828
4829[ Default: C<ARRENV>=S<C<(?:array|[pbvBV]matrix)> >
4830
4831=item C<COUNTERCMD>
4832
4833If a command in a deleted block which is also in the textcmd list matches C<COUNTERCMD> then an
4834additional command C<\addtocounter{>F<cntcmd>C<}{-1}>, where F<cntcmd> is the matching command, is appended in the diff file such that the numbering in the diff file remains synchronized with the
4835numbering in the new file.
4836
4837[ Default: C<COUNTERCMD>=C<(?:footnote|part|section|subsection> ...
4838
4839C<|subsubsection|paragraph|subparagraph)>  ]
4840
4841=item C<CUSTOMDIFCMD>
4842
4843This option is for advanced users and allows definition of special versions of commands, which do not work as safe commands.
4844
4845Commands in C<CUSTOMDIFCMD> that occur in added or deleted blocks will be given an ADD or DEL prefix.
4846The prefixed versions of the command must be defined in the preamble, either by putting them
4847in the preamble of at least the new file, or by creating a custom preamble file (Option --preamble).
4848For example the command C<\blindtext> (from package blindtext) does not interact well with underlining, so that
4849for the standard markup type, it is not satisfactory to define it as a safe command. Instead, a customised versions
4850without underlining can be defined in the preamble:
4851
4852C<\newcommand{\DELblindtext}{{\color{red}\blindtext}}>
4853
4854C<\newcommand{\ADDblindtext}{{\color{blue}\blindtext}}>
4855
4856and then latexdiff should be invoked with the option C<-c CUSTOMDIFCMD=blindtext>.
4857
4858[ Default: none ]
4859
4860=item C<FLOATENV>
4861
4862Environments whose name matches the regular expression in C<FLOATENV> are
4863considered floats.  Within these environments, the I<latexdiff> markup commands
4864are replaced by their FL variaties.
4865
4866[ Default: S<C<(?:figure|table|plate)[\w\d*@]*> >]
4867
4868=item C<ITEMCMD>
4869
4870Commands representing new item line with list environments.
4871
4872[ Default: \C<item> ]
4873
4874=item C<LISTENV>
4875
4876Environments whose name matches the regular expression in C<LISTENV> are list environments.
4877
4878[ Default: S<C<(?:itemize|enumerate|description)> >]
4879
4880=item C<MATHENV>,C<MATHREPL>
4881
4882If both \begin and \end for a math environment (environment name matching C<MATHENV> or \[ and \])
4883are within the same deleted block, they are replaced by a \begin and \end commands for C<MATHREPL>
4884rather than being commented out.
4885
4886[ Default: C<MATHENV>=S<C<(?:displaymath|equation)> >, C<MATHREPL>=S<C<displaymath> >]
4887
4888=item C<MATHARRENV>,C<MATHARRREPL>
4889
4890as C<MATHENV>,C<MATHREPL> but for equation arrays
4891
4892[ Default: C<MATHARRENV>=S<C<eqnarray\*?> >, C<MATHREPL>=S<C<eqnarray> >]
4893
4894=item C<MINWORDSBLOCK>
4895
4896Minimum number of tokens required to form an independent block. This value is
4897used in the algorithm to detect changes of complete blocks by merging identical text parts of less than C<MINWORDSBLOCK> to the preceding added and discarded parts.
4898
4899[ Default: 3 ]
4900
4901=item C<PICTUREENV>
4902
4903Within environments whose name matches the regular expression in C<PICTUREENV>
4904all latexdiff markup is removed (in pathologic cases this might lead to
4905inconsistent markup but this situation should be rare).
4906
4907[ Default: S<C<(?:picture|DIFnomarkup)[\w\d*@]*> >]
4908
4909=item C<SCALEDELGRAPHICS>
4910
4911If C<--graphics-markup=both> is chosen, C<SCALEDELGRAPHICS> is the factor, by which deleted figures will be scaled (i.e. 0.5 implies they are shown at half linear size).
4912
4913[ Default: 0.5 ]
4914
4915=item C<VERBATIMENV>
4916
4917RegEx describing environments like verbatim, whose contents should be taken verbatim. The content of these environments will not be processed in any way:
4918deleted content is commented out, new content is not marked up
4919
4920[ Default:  S<C<comment> > ]
4921
4922=item C<VERBATIMLINEENV>
4923
4924RegEx describing environments like verbatim, whose contents should be taken verbatim. The content of environments described by VERBATIMLINEENV are compared in
4925line mode, and changes are marked up using the listings package. The markup style is set based on the chosen mains markup type (Option -t), or on an analysis
4926of the preamble.
4927Note that "listings.sty" must be installed. If this file is not found the fallback solution is to
4928treat VERBATIMLINEENV environments treated exactly the same way as VERBATIMENV environments.
4929
4930[ Default:  S<C<(?:verbatim[*]?|lstlisting> > ]
4931
4932=back
4933
4934=head1 COMMON PROBLEMS AND FAQ
4935
4936=over 10
4937
4938=item Citations result in overfull boxes
4939
4940There is an incompatibility between the C<ulem> package, which C<latexdiff> uses for underlining and striking out in the UNDERLINE style,
4941the default style, and the way citations are generated. In order to be able to mark up citations properly, they are enclosed with an C<\mbox>
4942command. As mboxes cannot be broken across lines, this procedure frequently results in overfull boxes, possibly obscuring the content as it extends beyond the right margin.  The same occurs for some other packages (e.g., siunitx). If this is a problem, you have two possibilities.
4943
49441. Use C<CFONT> type markup (option C<-t CFONT>): If this markup is chosen, then changed citations are no longer marked up
4945with the wavy line (additions) or struck out (deletions), but are still highlighted in the appropriate color, and deleted text is shown with a different font. Other styles not using the C<ulem> package will also work.
4946
49472. Choose option C<--disable-citation-markup> which turns off the marking up of citations: deleted citations are no longer shown, and
4948added citations are shown without markup. (This was the default behaviour of latexdiff at versions 0.6 and older)
4949
4950For custom packages you can define the commands which need to be protected by C<\mbox> with C<--append-mboxsafecmd> and C<--excludemboxsafecmd> options
4951(submit your lists of command as feature request at github page to set the default behaviour of future versions, see section 6)
4952
4953=item Changes in complicated mathematical equations result in latex processing errors
4954
4955Try options C<--math-markup=whole>.   If even that fails, you can turn off mark up for equations with C<--math-markup=off>.
4956
4957=item How can I just show the pages where changes had been made
4958
4959Use options -C<-s ZLABEL>  (some postprocessing required) or C<-s ONLYCHANGEDPAGE>. C<latexdiff-vc --ps|--pdf> with C<--only-changes> option takes care of
4960the post-processing for you (requires zref package to be installed).
4961
4962=back
4963
4964=head1 BUGS
4965
4966=over 10
4967
4968=item Option allow-spaces not implemented entirely consistently. It breaks
4969the rules that number and type of white space does not matter, as
4970different numbers of inter-argument spaces are treated as significant.
4971
4972=back
4973
4974Please submit bug reports using the issue tracker of the github repository page I<https://github.com/ftilmann/latexdiff.git>,
4975or send them to I<tilmann -- AT -- gfz-potsdam.de>.  Include the version number of I<latexdiff>
4976(from comments at the top of the source or use B<--version>).  If you come across latex
4977files that are error-free and conform to the specifications set out
4978above, and whose differencing still does not result in error-free
4979latex, please send me those files, ideally edited to only contain the
4980offending passage as long as that still reproduces the problem. If your
4981file relies on non-standard class files, you must include those.  I will not
4982look at examples where I have trouble to latex the original files.
4983
4984=head1 SEE ALSO
4985
4986L<latexrevise>, L<latexdiff-vc>
4987
4988=head1 PORTABILITY
4989
4990I<latexdiff> does not make use of external commands and thus should run
4991on any platform  supporting Perl 5.6 or higher.  If files with encodings
4992other than ASCII or UTF-8 are processed, Perl 5.8 or higher is required.
4993
4994The standard version of I<latexdiff> requires installation of the Perl package
4995C<Algorithm::Diff> (available from I<www.cpan.org> -
4996I<http://search.cpan.org/~nedkonz/Algorithm-Diff-1.15>) but a stand-alone
4997version, I<latexdiff-so>, which has this package inlined, is available, too.
4998I<latexdiff-fast> requires the I<diff> command to be present.
4999
5000=head1 AUTHOR
5001
5002Version 1.3.0
5003Copyright (C) 2004-2018 Frederik Tilmann
5004
5005This program is free software; you can redistribute it and/or modify
5006it under the terms of the GNU General Public License Version 3
5007
5008Contributors of fixes and additions: V. Kuhlmann, J. Paisley, N. Becker, T. Doerges, K. Huebner,
5009T. Connors, Sebastian Gouezel and many others.
5010Thanks to the many people who sent in bug reports, feature suggestions, and other feedback.
5011
5012=cut
5013
5014__END__
5015%%BEGIN SAFE COMMANDS
5016% Regex matching commands which can safely be in the
5017% argument of a \DIFadd or \DIFdel command (leave out the \)
5018arabic
5019dashbox
5020emph
5021fbox
5022framebox
5023hspace\*?
5024math.*
5025makebox
5026mbox
5027pageref
5028ref
5029symbol
5030raisebox
5031rule
5032text.*
5033shortstack
5034usebox
5035dag
5036ddag
5037copyright
5038pounds
5039S
5040P
5041oe
5042OE
5043ae
5044AE
5045aa
5046AA
5047o
5048O
5049l
5050L
5051frac
5052ss
5053sqrt
5054ldots
5055cdots
5056vdots
5057ddots
5058alpha
5059beta
5060gamma
5061delta
5062epsilon
5063varepsilon
5064zeta
5065eta
5066theta
5067vartheta
5068iota
5069kappa
5070lambda
5071mu
5072nu
5073xi
5074pi
5075varpi
5076rho
5077varrho
5078sigma
5079varsigma
5080tau
5081upsilon
5082phi
5083varphi
5084chi
5085psi
5086omega
5087Gamma
5088Delta
5089Theta
5090Lambda
5091Xi
5092Pi
5093Sigma
5094Upsilon
5095Phi
5096Psi
5097Omega
5098ps
5099mp
5100times
5101div
5102ast
5103star
5104circ
5105bullet
5106cdot
5107cap
5108cup
5109uplus
5110sqcap
5111vee
5112wedge
5113setminus
5114wr
5115diamond
5116(?:big)?triangle.*
5117lhd
5118rhd
5119unlhd
5120unrhd
5121oplus
5122ominus
5123otimes
5124oslash
5125odot
5126bigcirc
5127d?dagger
5128amalg
5129leq
5130prec
5131preceq
5132ll
5133(?:sq)?su[bp]set(?:eq)?
5134in
5135vdash
5136geq
5137succ(?:eq)?
5138gg
5139ni
5140dashv
5141equiv
5142sim(?:eq)?
5143asymp
5144approx
5145cong
5146neq
5147doteq
5148propto
5149models
5150perp
5151mid
5152parallel
5153bowtie
5154Join
5155smile
5156frown
5157.*arrow
5158(?:long)?mapsto
5159.*harpoon.*
5160leadsto
5161aleph
5162hbar
5163imath
5164jmath
5165ell
5166wp
5167Re
5168Im
5169mho
5170prime
5171emptyset
5172nabla
5173surd
5174top
5175bot
5176angle
5177forall
5178exists
5179neg
5180flat
5181natural
5182sharp
5183backslash
5184partial
5185infty
5186Box
5187Diamond
5188triangle
5189clubsuit
5190diamondsuit
5191heartsuit
5192spadesuit
5193sum
5194prod
5195coprod
5196int
5197oint
5198big(?:sq)?c[au]p
5199bigvee
5200bigwedge
5201bigodot
5202bigotimes
5203bigoplus
5204biguplus
5205(?:arc)?(?:cos|sin|tan|cot)h?
5206csc
5207arg
5208deg
5209det
5210dim
5211exp
5212gcd
5213hom
5214inf
5215ker
5216lg
5217lim
5218liminf
5219limsup
5220ln
5221log
5222max
5223min
5224Pr
5225sec
5226sup
5227bibfield
5228bibinfo
5229[Hclbkdruvt]
5230[`'^"~=.]
5231_
5232AMPERSAND
5233(SUPER|SUB)SCRIPTNB
5234(SUPER|SUB)SCRIPT
5235SQRT
5236SQRTNB
5237PERCENTAGE
5238DOLLAR
5239%%END SAFE COMMANDS
5240
5241%%BEGIN TEXT COMMANDS
5242% Regex matching commands with a text argument (leave out the \)
5243addcontents.*
5244cc
5245closing
5246chapter
5247dashbox
5248emph
5249encl
5250fbox
5251framebox
5252footnote
5253footnotetext
5254framebox
5255href
5256intertext
5257part
5258(sub){0,2}section\*?
5259(sub)?paragraph\*?
5260makebox
5261mbox
5262opening
5263parbox
5264raisebox
5265savebox
5266sbox
5267shortintertext
5268shortstack
5269sidenote
5270signature
5271text.*
5272value
5273underline
5274sqrt
5275(SUPER|SUB)SCRIPT
5276%%END TEXT COMMANDS
5277
5278%%BEGIN CONTEXT1 COMMANDS
5279% Regex matching commands with a text argument (leave out the \), which will fail out of context. These commands behave like text commands, except when they occur in a deleted section, where they are disabled, but their argument is shown as deleted text.
5280caption
5281subcaption
5282%%END CONTEXT1 COMMANDS
5283
5284%%BEGIN CONTEXT2 COMMANDS
5285% Regex matching commands with a text argument (leave out the \), which will fail out of context.  As corresponding commands for context1.  The only difference is that context2 commands are completely disabled in deleted sections, including their arguments.
5286title
5287author
5288date
5289institute
5290%%END CONTEXT2 COMMANDS
5291
5292%% CONFIGURATION variabe defaults
5293%%BEGIN LISTENV CONFIG
5294itemize
5295description
5296enumerate
5297%%END LISTENV CONFIG
5298
5299%%BEGIN FLOATENV CONFIG
5300figure[\w\d*@]*
5301table[\w\d*@]*
5302plate[\w\d*@]*
5303%%END FLOATENV CONFIG
5304
5305%%BEGIN PICTUREENV CONFIG
5306picture[\w\d*@]*
5307tikzpicture[\w\d*@]*
5308DIFnomarkup
5309%%END PICTUREENV CONFIG
5310
5311%%BEGIN MATHENV CONFIG
5312equation[*]?
5313displaymath
5314DOLLARDOLLAR
5315%%END MATHENV CONFIG
5316
5317%%BEGIN MATHARRENV CONFIG
5318eqnarray[*]?
5319align[*]?
5320alignat[*]?
5321gather[*]?
5322multline[*]?
5323flalign[*]?
5324%%END MATHARRENV CONFIG
5325
5326%%BEGIN ARRENV CONFIG
5327aligned
5328gathered
5329array
5330[pbvBV]?matrix
5331smallmatrix
5332cases
5333split
5334%%END ARRENV CONFIG
5335
5336%%BEGIN COUNTERCMD CONFIG
5337footnote
5338part
5339chapter
5340section
5341subsection
5342subsubsection
5343paragraph
5344subparagraph
5345%%END COUNTERCMD CONFIG
5346
5347%%BEGIN VERBATIMENV CONFIG
5348comment
5349%%END VERBATIMENV CONFIG
5350
5351%%BEGIN VERBATIMLINEENV CONFIG
5352lstlisting
5353verbatim[*]?
5354%%END VERBATIMLINEENV CONFIG
5355
5356%%BEGIN CUSTOMDIFCMD CONFIG
5357%%END CUSTOMDIFCMD CONFIG
5358
5359%%% TYPES (Commands for highlighting changed blocks)
5360
5361%DIF UNDERLINE PREAMBLE
5362\RequirePackage[normalem]{ulem}
5363\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
5364\providecommand{\DIFadd}[1]{{\protect\color{blue}\uwave{#1}}}
5365\providecommand{\DIFdel}[1]{{\protect\color{red}\sout{#1}}}
5366%DIF END UNDERLINE PREAMBLE
5367
5368%DIF CTRADITIONAL PREAMBLE
5369\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
5370\RequirePackage[stable]{footmisc}
5371\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf}
5372\providecommand{\DIFadd}[1]{{\protect\color{blue} \sf #1}}
5373\providecommand{\DIFdel}[1]{{\protect\color{red} [..\footnote{removed: #1} ]}}
5374%DIF END CTRADITIONAL PREAMBLE
5375
5376%DIF TRADITIONAL PREAMBLE
5377\RequirePackage[stable]{footmisc}
5378\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf}
5379\providecommand{\DIFadd}[1]{{\sf #1}}
5380\providecommand{\DIFdel}[1]{{[..\footnote{removed: #1} ]}}
5381%DIF END TRADITIONAL PREAMBLE
5382
5383%DIF CFONT PREAMBLE
5384\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
5385\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf}
5386\providecommand{\DIFadd}[1]{{\protect\color{blue} \sf #1}}
5387\providecommand{\DIFdel}[1]{{\protect\color{red} \scriptsize #1}}
5388%DIF END CFONT PREAMBLE
5389
5390%DIF FONTSTRIKE PREAMBLE
5391\RequirePackage[normalem]{ulem}
5392\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf}
5393\providecommand{\DIFadd}[1]{{\sf #1}}
5394\providecommand{\DIFdel}[1]{{\footnotesize \sout{#1}}}
5395%DIF END FONTSTRIKE PREAMBLE
5396
5397%DIF CCHANGEBAR PREAMBLE
5398\RequirePackage[dvips]{changebar}
5399\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
5400\providecommand{\DIFadd}[1]{\protect\cbstart{\protect\color{blue}#1}\protect\cbend}
5401\providecommand{\DIFdel}[1]{\protect\cbdelete{\protect\color{red}#1}\protect\cbdelete}
5402%DIF END CCHANGEBAR PREAMBLE
5403
5404%DIF CFONTCHBAR PREAMBLE
5405\RequirePackage[dvips]{changebar}
5406\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
5407\providecommand{\DIFadd}[1]{\protect\cbstart{\protect\color{blue}\sf #1}\protect\cbend}
5408\providecommand{\DIFdel}[1]{\protect\cbdelete{\protect\color{red}\scriptsize #1}\protect\cbdelete}
5409%DIF END CFONTCHBAR PREAMBLE
5410
5411%DIF CULINECHBAR PREAMBLE
5412\RequirePackage[normalem]{ulem}
5413\RequirePackage[dvips]{changebar}
5414\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
5415\providecommand{\DIFadd}[1]{\protect\cbstart{\protect\color{blue}\uwave{#1}}\protect\cbend}
5416\providecommand{\DIFdel}[1]{\protect\cbdelete{\protect\color{red}\sout{#1}}\protect\cbdelete}
5417%DIF END CULINECHBAR PREAMBLE
5418
5419%DIF CHANGEBAR PREAMBLE
5420\RequirePackage[dvips]{changebar}
5421\providecommand{\DIFadd}[1]{\protect\cbstart{#1}\protect\cbend}
5422\providecommand{\DIFdel}[1]{\protect\cbdelete}
5423%DIF END CHANGEBAR PREAMBLE
5424
5425%DIF INVISIBLE PREAMBLE
5426\providecommand{\DIFadd}[1]{#1}
5427\providecommand{\DIFdel}[1]{}
5428%DIF END INVISIBLE PREAMBLE
5429
5430%DIF BOLD PREAMBLE
5431\DeclareOldFontCommand{\bf}{\normalfont\bfseries}{\mathbf}
5432\providecommand{\DIFadd}[1]{{\bf #1}}
5433\providecommand{\DIFdel}[1]{}
5434%DIF END BOLD PREAMBLE
5435
5436%DIF PDFCOMMENT PREAMBLE
5437\RequirePackage{pdfcomment} %DIF PREAMBLE
5438\providecommand{\DIFadd}[1]{\pdfmarkupcomment[author=ADD:,markup=Underline]{#1}{}}
5439\providecommand{\DIFdel}[1]{\pdfcomment[icon=Insert,author=DEL:,hspace=12pt]{#1}}
5440%DIF END PDFCOMMENT PREAMBLE
5441
5442%% SUBTYPES (Markers for beginning and end of changed blocks)
5443
5444%DIF SAFE PREAMBLE
5445\providecommand{\DIFaddbegin}{}
5446\providecommand{\DIFaddend}{}
5447\providecommand{\DIFdelbegin}{}
5448\providecommand{\DIFdelend}{}
5449\providecommand{\DIFmodbegin}{}
5450\providecommand{\DIFmodend}{}
5451%DIF END SAFE PREAMBLE
5452
5453%DIF MARGIN PREAMBLE
5454\providecommand{\DIFaddbegin}{\protect\marginpar{a[}}
5455\providecommand{\DIFaddend}{\protect\marginpar{]}}
5456\providecommand{\DIFdelbegin}{\protect\marginpar{d[}}
5457\providecommand{\DIFdelend}{\protect\marginpar{]}}
5458\providecommand{\DIFmodbegin}{\protect\marginpar{m[}}
5459\providecommand{\DIFmodend}{\protect\marginpar{]}}
5460%DIF END MARGIN PREAMBLE
5461
5462%DIF DVIPSCOL PREAMBLE
5463%Note: only works with dvips converter
5464\RequirePackage{color}
5465\RequirePackage{dvipscol}
5466\providecommand{\DIFaddbegin}{\protect\nogroupcolor{blue}}
5467\providecommand{\DIFaddend}{\protect\nogroupcolor{black}}
5468\providecommand{\DIFdelbegin}{\protect\nogroupcolor{red}}
5469\providecommand{\DIFdelend}{\protect\nogroupcolor{black}}
5470\providecommand{\DIFmodbegin}{}
5471\providecommand{\DIFmodend}{}
5472%DIF END DVIPSCOL PREAMBLE
5473
5474%DIF COLOR PREAMBLE
5475\RequirePackage{color}
5476\providecommand{\DIFaddbegin}{\protect\color{blue}}
5477\providecommand{\DIFaddend}{\protect\color{black}}
5478\providecommand{\DIFdelbegin}{\protect\color{red}}
5479\providecommand{\DIFdelend}{\protect\color{black}}
5480\providecommand{\DIFmodbegin}{}
5481\providecommand{\DIFmodend}{}
5482%DIF END COLOR PREAMBLE
5483
5484%DIF LABEL PREAMBLE
5485% To show only pages with changes (pdf) (external program pdftk needs to be installed)
5486% (only works for simple documents with non-repeated page numbers, otherwise use ZLABEL)
5487% pdflatex diff.tex
5488% pdflatex diff.tex
5489%pdftk diff.pdf cat \
5490%`perl -lne '\
5491% if (m/\\newlabel{DIFchg[b](\d*)}{{.*}{(.*)}}/) { $start{$1}=$2; print $2}\
5492% if (m/\\newlabel{DIFchg[e](\d*)}{{.*}{(.*)}}/) { \
5493%      if (defined($start{$1})) { \
5494%         for ($j=$start{$1}; $j<=$2; $j++) {print "$j";}\
5495%      } else { \
5496%         print "$2"\
5497%      }\
5498% }' diff.aux \
5499% | uniq \
5500% | tr  \\n ' '` \
5501% output diff-changedpages.pdf
5502% To show only pages with changes (dvips/dvipdf)
5503% dvips -pp `\
5504% [ put here the perl script from above]
5505% | uniq | tr -s \\n ','`
5506\typeout{Check comments in preamble of output for instructions how to show only pages where changes have been made}
5507\newcount\DIFcounterb
5508\global\DIFcounterb 0\relax
5509\newcount\DIFcountere
5510\global\DIFcountere 0\relax
5511\providecommand{\DIFaddbegin}{\global\advance\DIFcounterb 1\relax\label{DIFchgb\the\DIFcounterb}}
5512\providecommand{\DIFaddend}{\global\advance\DIFcountere 1\relax\label{DIFchge\the\DIFcountere}}
5513\providecommand{\DIFdelbegin}{\global\advance\DIFcounterb 1\relax\label{DIFchgb\the\DIFcounterb}}
5514\providecommand{\DIFdelend}{\global\advance\DIFcountere 1\relax\label{DIFchge\the\DIFcountere}}
5515\providecommand{\DIFmodbegin}{\global\advance\DIFcounterb 1\relax\label{DIFchgb\the\DIFcounterb}}
5516\providecommand{\DIFmodend}{\global\advance\DIFcountere 1\relax\label{DIFchge\the\DIFcountere}}
5517%DIF END LABEL PREAMBLE
5518
5519%DIF ZLABEL PREAMBLE
5520% To show only pages with changes (pdf) (external program pdftk needs to be installed)
5521% (uses zref for reference to absolute page numbers)
5522% pdflatex diff.tex
5523% pdflatex diff.tex
5524%pdftk diff.pdf cat \
5525%`perl -lne 'if (m/\\zref\@newlabel{DIFchgb(\d*)}{.*\\abspage{(\d*)}}/ ) { $start{$1}=$2; print $2 } \
5526%  if (m/\\zref\@newlabel{DIFchge(\d*)}{.*\\abspage{(\d*)}}/) { \
5527%      if (defined($start{$1})) { \
5528%         for ($j=$start{$1}; $j<=$2; $j++) {print "$j";}\
5529%      } else { \
5530%         print "$2"\
5531%      }\
5532% }' diff.aux \
5533% | uniq \
5534% | tr  \\n ' '` \
5535% output diff-changedpages.pdf
5536% To show only pages with changes (dvips/dvipdf)
5537% latex diff.tex
5538% latex diff.tex
5539% dvips -pp `perl -lne 'if (m/\\newlabel{DIFchg[be]\d*}{{.*}{(.*)}}/) { print $1 }' diff.aux | uniq | tr -s \\n ','` diff.dvi
5540\typeout{Check comments in preamble of output for instructions how to show only pages where changes have been made}
5541\usepackage[user,abspage]{zref}
5542\newcount\DIFcounterb
5543\global\DIFcounterb 0\relax
5544\newcount\DIFcountere
5545\global\DIFcountere 0\relax
5546\providecommand{\DIFaddbegin}{\global\advance\DIFcounterb 1\relax\zlabel{DIFchgb\the\DIFcounterb}}
5547\providecommand{\DIFaddend}{\global\advance\DIFcountere 1\relax\zlabel{DIFchge\the\DIFcountere}}
5548\providecommand{\DIFdelbegin}{\global\advance\DIFcounterb 1\relax\zlabel{DIFchgb\the\DIFcounterb}}
5549\providecommand{\DIFdelend}{\global\advance\DIFcountere 1\relax\zlabel{DIFchge\the\DIFcountere}}
5550\providecommand{\DIFmodbegin}{\global\advance\DIFcounterb 1\relax\zlabel{DIFchgb\the\DIFcounterb}}
5551\providecommand{\DIFmodend}{\global\advance\DIFcountere 1\relax\zlabel{DIFchge\the\DIFcountere}}
5552%DIF END ZLABEL PREAMBLE
5553
5554%DIF ONLYCHANGEDPAGE PREAMBLE
5555\RequirePackage{atbegshi}
5556\RequirePackage{etoolbox}
5557\RequirePackage{zref}
5558% redefine label command to write immediately to aux file - page references will be lost
5559\makeatletter \let\oldlabel\label% Store \label
5560\renewcommand{\label}[1]{% Update \label to write to the .aux immediately
5561\zref@wrapper@immediate{\oldlabel{#1}}}
5562\makeatother
5563\newbool{DIFkeeppage}
5564\newbool{DIFchange}
5565\boolfalse{DIFkeeppage}
5566\boolfalse{DIFchange}
5567\AtBeginShipout{%
5568  \ifbool{DIFkeeppage}
5569        {\global\boolfalse{DIFkeeppage}}  % True DIFkeeppage
5570         {\ifbool{DIFchange}{\global\boolfalse{DIFkeeppage}}{\global\boolfalse{DIFkeeppage}\AtBeginShipoutDiscard}} % False DIFkeeppage
5571}
5572\providecommand{\DIFaddbegin}{\global\booltrue{DIFkeeppage}\global\booltrue{DIFchange}}
5573\providecommand{\DIFaddend}{\global\booltrue{DIFkeeppage}\global\boolfalse{DIFchange}}
5574\providecommand{\DIFdelbegin}{\global\booltrue{DIFkeeppage}\global\booltrue{DIFchange}}
5575\providecommand{\DIFdelend}{\global\booltrue{DIFkeeppage}\global\boolfalse{DIFchange}}
5576\providecommand{\DIFmodbegin}{\global\booltrue{DIFkeeppage}\global\booltrue{DIFchange}}
5577\providecommand{\DIFmodend}{\global\booltrue{DIFkeeppage}\global\boolfalse{DIFchange}}
5578%DIF END ONLYCHANGEDPAGE PREAMBLE
5579
5580%% FLOAT TYPES
5581
5582%DIF FLOATSAFE PREAMBLE
5583\providecommand{\DIFaddFL}[1]{\DIFadd{#1}}
5584\providecommand{\DIFdelFL}[1]{\DIFdel{#1}}
5585\providecommand{\DIFaddbeginFL}{}
5586\providecommand{\DIFaddendFL}{}
5587\providecommand{\DIFdelbeginFL}{}
5588\providecommand{\DIFdelendFL}{}
5589%DIF END FLOATSAFE PREAMBLE
5590
5591%DIF IDENTICAL PREAMBLE
5592\providecommand{\DIFaddFL}[1]{\DIFadd{#1}}
5593\providecommand{\DIFdelFL}[1]{\DIFdel{#1}}
5594\providecommand{\DIFaddbeginFL}{\DIFaddbegin}
5595\providecommand{\DIFaddendFL}{\DIFaddend}
5596\providecommand{\DIFdelbeginFL}{\DIFdelbegin}
5597\providecommand{\DIFdelendFL}{\DIFdelend}
5598%DIF END IDENTICAL PREAMBLE
5599
5600%DIF TRADITIONALSAFE PREAMBLE
5601% procidecommand color to make this work for TRADITIONAL and CTRADITIONAL
5602\providecommand{\color}[1]{}
5603\providecommand{\DIFaddFL}[1]{\DIFadd{#1}}
5604\providecommand{\DIFdel}[1]{{\protect\color{red}[..{\scriptsize {removed: #1}} ]}}
5605\providecommand{\DIFaddbeginFL}{}
5606\providecommand{\DIFaddendFL}{}
5607\providecommand{\DIFdelbeginFL}{}
5608\providecommand{\DIFdelendFL}{}
5609%DIF END TRADITIONALSAFE PREAMBLE
5610
5611% see:
5612%  http://tex.stackexchange.com/questions/47351/can-i-redefine-a-command-to-contain-itself
5613
5614%DIF HIGHLIGHTGRAPHICS PREAMBLE
5615\RequirePackage{settobox}
5616\RequirePackage{letltxmacro}
5617\newsavebox{\DIFdelgraphicsbox}
5618\newlength{\DIFdelgraphicswidth}
5619\newlength{\DIFdelgraphicsheight}
5620% store original definition of \includegraphics
5621\LetLtxMacro{\DIFOincludegraphics}{\includegraphics}
5622\newcommand{\DIFaddincludegraphics}[2][]{{\color{blue}\fbox{\DIFOincludegraphics[#1]{#2}}}}
5623\newcommand{\DIFdelincludegraphics}[2][]{%
5624\sbox{\DIFdelgraphicsbox}{\DIFOincludegraphics[#1]{#2}}%
5625\settoboxwidth{\DIFdelgraphicswidth}{\DIFdelgraphicsbox}
5626\settoboxtotalheight{\DIFdelgraphicsheight}{\DIFdelgraphicsbox}
5627\scalebox{\DIFscaledelfig}{%
5628\parbox[b]{\DIFdelgraphicswidth}{\usebox{\DIFdelgraphicsbox}\\[-\baselineskip] \rule{\DIFdelgraphicswidth}{0em}}\llap{\resizebox{\DIFdelgraphicswidth}{\DIFdelgraphicsheight}{%
5629\setlength{\unitlength}{\DIFdelgraphicswidth}%
5630\begin{picture}(1,1)%
5631\thicklines\linethickness{2pt}
5632{\color[rgb]{1,0,0}\put(0,0){\framebox(1,1){}}}%
5633{\color[rgb]{1,0,0}\put(0,0){\line( 1,1){1}}}%
5634{\color[rgb]{1,0,0}\put(0,1){\line(1,-1){1}}}%
5635\end{picture}%
5636}\hspace*{3pt}}}
5637}
5638\LetLtxMacro{\DIFOaddbegin}{\DIFaddbegin}
5639\LetLtxMacro{\DIFOaddend}{\DIFaddend}
5640\LetLtxMacro{\DIFOdelbegin}{\DIFdelbegin}
5641\LetLtxMacro{\DIFOdelend}{\DIFdelend}
5642\DeclareRobustCommand{\DIFaddbegin}{\DIFOaddbegin \let\includegraphics\DIFaddincludegraphics}
5643\DeclareRobustCommand{\DIFaddend}{\DIFOaddend \let\includegraphics\DIFOincludegraphics}
5644\DeclareRobustCommand{\DIFdelbegin}{\DIFOdelbegin \let\includegraphics\DIFdelincludegraphics}
5645\DeclareRobustCommand{\DIFdelend}{\DIFOaddend \let\includegraphics\DIFOincludegraphics}
5646\LetLtxMacro{\DIFOaddbeginFL}{\DIFaddbeginFL}
5647\LetLtxMacro{\DIFOaddendFL}{\DIFaddendFL}
5648\LetLtxMacro{\DIFOdelbeginFL}{\DIFdelbeginFL}
5649\LetLtxMacro{\DIFOdelendFL}{\DIFdelendFL}
5650\DeclareRobustCommand{\DIFaddbeginFL}{\DIFOaddbeginFL \let\includegraphics\DIFaddincludegraphics}
5651\DeclareRobustCommand{\DIFaddendFL}{\DIFOaddendFL \let\includegraphics\DIFOincludegraphics}
5652\DeclareRobustCommand{\DIFdelbeginFL}{\DIFOdelbeginFL \let\includegraphics\DIFdelincludegraphics}
5653\DeclareRobustCommand{\DIFdelendFL}{\DIFOaddendFL \let\includegraphics\DIFOincludegraphics}
5654%DIF END HIGHLIGHTGRAPHICS PREAMBLE
5655
5656%% SPECIAL PACKAGE PREAMBLE COMMANDS
5657
5658% Standard \DIFadd and \DIFdel are redefined as \DIFaddtex and \DIFdeltex
5659% when hyperref package is included.
5660%DIF HYPERREF PREAMBLE
5661\providecommand{\DIFadd}[1]{\texorpdfstring{\DIFaddtex{#1}}{#1}}
5662\providecommand{\DIFdel}[1]{\texorpdfstring{\DIFdeltex{#1}}{}}
5663%DIF END HYPERREF PREAMBLE
5664
5665%DIF LISTINGS PREAMBLE
5666\RequirePackage{listings}
5667\RequirePackage{color}
5668\lstdefinelanguage{DIFcode}{
5669  % note that the definitions in the following two lines are overwritten dependent on the markup type selected %DIFCODE TEMPLATE
5670  morecomment=[il]{\%DIF\ <\ },          %DIFCODE TEMPLATE
5671  moredelim=[il][\bfseries]{\%DIF\ >\ }  %DIFCODE TEMPLATE
5672}
5673\lstdefinestyle{DIFverbatimstyle}{
5674	language=DIFcode,
5675	basicstyle=\ttfamily,
5676	columns=fullflexible,
5677	keepspaces=true
5678}
5679\lstnewenvironment{DIFverbatim}{\lstset{style=DIFverbatimstyle}}{}
5680\lstnewenvironment{DIFverbatim*}{\lstset{style=DIFverbatimstyle,showspaces=true}}{}
5681%DIF END LISTINGS PREAMBLE
5682
5683%DIF DIFCODE_UNDERLINE
5684  moredelim=[il][\color{red}\sout]{\%DIF\ <\ },
5685  moredelim=[il][\color{blue}\uwave]{\%DIF\ >\ }
5686%DIF END DIFCODE_UNDERLINE
5687
5688%DIF DIFCODE_CTRADITIONAL
5689  moredelim=[il][\color{red}\scriptsize]{\%DIF\ <\ },
5690  moredelim=[il][\color{blue}\sffamily]{\%DIF\ >\ }
5691%DIF END DIFCODE_CTRADITIONAL
5692
5693%DIF DIFCODE_TRADITIONAL
5694  moredelim=[il][\color{white}\tiny]{\%DIF\ <\ },
5695  moredelim=[il][\sffamily]{\%DIF\ >\ }
5696%DIF END DIFCODE_TRADITIONAL
5697
5698%DIF DIFCODE_CFONT
5699  moredelim=[il][\color{red}\scriptsize]{\%DIF\ <\ },
5700  moredelim=[il][\color{blue}\sffamily]{\%DIF\ >\ }
5701%DIF END DIFCODE_CFONT
5702
5703%DIF DIFCODE_FONTSTRIKE
5704  moredelim=[il][\scriptsize \sout]{\%DIF\ <\ },
5705  moredelim=[il][\sffamily]{\%DIF\ >\ }
5706%DIF END DIFCODE_FONTSTRIKE
5707
5708%DIF DIFCODE_INVISIBLE
5709  moredelim=[il][\color{white}\tiny]{\%DIF\ <\ },
5710  moredelim=[il]{\%DIF\ >\ }
5711%DIF END DIFCODE_INVISIBLE
5712
5713%DIF DIFCODE_CHANGEBAR
5714  moredelim=[il][\color{white}\tiny]{\%DIF\ <\ },
5715  moredelim=[il]{\%DIF\ >\ }
5716%DIF END DIFCODE_CHANGEBAR
5717
5718%DIF DIFCODE_CCHANGEBAR
5719  moredelim=[il][\color{red}]{\%DIF\ <\ },
5720  moredelim=[il][\color{blue}]{\%DIF\ >\ }
5721%DIF END DIFCODE_CCHANGEBAR
5722
5723%DIF DIFCODE_CULINECHBAR
5724  moredelim=[il][\color{red}\sout]{\%DIF\ <\ },
5725  moredelim=[il][\color{blue}\uwave]{\%DIF\ >\ }
5726%DIF END DIFCODE_CULINECHBAR
5727
5728%DIF DIFCODE_CFONTCHBAR
5729  moredelim=[il][\color{red}\scriptsize]{\%DIF\ <\ },
5730  moredelim=[il][\color{blue}\sffamily]{\%DIF\ >\ }
5731%DIF END DIFCODE_CFONTCHBAR
5732
5733%DIF DIFCODE_BOLD
5734  % unfortunately \bfseries cannot be combined with ttfamily without extra packages
5735  % also morecomment=[il] is broken as of v1.5b of listings at least
5736  % workaround: plot in white with tiny font
5737  % morecomment=[il]{\%DIF\ <\ },
5738  moredelim=[il][\color{white}\tiny]{\%DIF\ <\ },
5739  moredelim=[il][\sffamily\bfseries]{\%DIF\ >\ }
5740%DIF END DIFCODE_BOLD
5741
5742%DIF DIFCODE_PDFCOMMENT
5743
5744  moredelim=[il][\color{white}\tiny]{\%DIF\ <\ },
5745  moredelim=[il][\sffamily\bfseries]{\%DIF\ >\ }
5746%DIF END DIFCODE_PDFCOMMENT
5747
5748