latexdiff

   1 #!/usr/bin/perl -w
   2 # latexdiff - differences two latex files on the word level
   3 #             and produces a latex file with the differences marked up.
   4 #
   5 #   Copyright (C) 2004-2007  F J Tilmann (tilmann@esc.cam.ac.uk)
   6 #
   7 #    This program is free software; you can redistribute it and/or modify
   8 #    it under the terms of the GNU General Public License Version 2 as published by
   9 #    the Free Software Foundation.
  10 #
  11 #    This program is distributed in the hope that it will be useful,
  12 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 #    GNU General Public License for more details.
  15 #
  16 #    You should have received a copy of the GNU General Public License
  17 #    along with this program; if not, write to the Free Software
  18 #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19 #
  20 # Detailed usage information at the end of the file
  21 #
  22 # Version 0.5  A number of minor improvements based on feedback
  23 #              Deleted blocks are now shown before added blocks
  24 #              Package specific processing
  25 #
  26 # Version 0.43 unreleased typo in list of styles at the end
  27 #              Add protect to all \cbstart, \cbend commands
  28 #              More robust substitution of deleted math commands
  29 #
  30 # Version 0.42 November 06  Bug fixes only
  31 #
  32 # Version 0.4   March 06 option for fast differencing using UNIX diff command, several minor bug fixes (\par bug, improved highlighting of textcmds)
  33 #
  34 # Version 0.3   August 05 improved parsing of displayed math, --allow-spaces
  35 #               option, several minor bug fixes
  36 #
  37 # Version 0.25  October 04 Fix bug with deleted equations, add math mode commands to safecmd, add | to allowed interpunctuation signs
  38 # Version 0.2   September 04 extension to utf-8 and variable encodings
  39 # Version 0.1   August 04    First public release
  40
  41 # Inserted block for differenceing
  42 # use Algorithm::Diff qw(traverse_sequences);
  43 # in standard version
  44 # The following BEGIN block contains a verbatim copy of
  45 # Ned Konz' Algorithm::Diff package version 1.15 except
  46 # that subroutine _longestCommonSubsequence has been replace by
  47 # a routine which internally uses the UNIX diff command for
  48 # the differencing rather than the Perl routines if the
  49 # length of the sequences exceeds some threshold.
  50 # Also, all POD documentation has been stripped out.
  51 #
  52 # (the distribution on which this modification is based is available
  53 #  from http://search.cpan.org/~nedkonz/Algorithm-Diff-1.15
  54 #  the most recent version can be found via  http://search.cpan.org/search?module=Algorithm::Diff )
  55 # Please note that the LICENCSE for Algorithm::Diff :
  56 #   "Copyright (c) 2000-2002 Ned Konz.  All rights reserved.
  57 #    This program is free software;
  58 #    you can redistribute it and/or modify it under the same terms
  59 #    as Perl itself."
  60 # The fast-differencing version of latexdiff is provided as a convenience
  61 # for latex users under Unix-like systems which have a 'diff' command.
  62 # If you believe
  63 # the inlining of Algorithm::Diff violates its license please contact
  64 # me and I will modify the latexdiff distribution accordingly.
  65 # Frederik Tilmann (tilmann@esc.cam.ac.uk)
  66 # Jonathan Paisley is acknowledged for the idea of using the system diff
  67 # command to achieve shorter running times
  68 BEGIN {
  69 package Algorithm::Diff;
  70 use strict;
  71 use vars qw($VERSION @EXPORT_OK @ISA @EXPORT);
  72 use integer;    # see below in _replaceNextLargerWith() for mod to make
  73                 # if you don't use this
  74 require Exporter;
  75 @ISA       = qw(Exporter);
  76 @EXPORT    = qw();
  77 @EXPORT_OK = qw(LCS diff traverse_sequences traverse_balanced sdiff);
  78 $VERSION = sprintf('%d.%02d fast', (q$Revision: 1.15 $ =~ /\d+/g));
  79
  80 # Global parameters
  81
  82 use File::Temp qw/tempfile/;
  83 # if larger number of elements in longestCommonSubsequence smaller than
  84 # this number, then use internal algorithm, otherwise use UNIX diff
  85 use constant THRESHOLD => 100 ;
  86 # Detect whether diff --minimal option is available
  87 # if yes we use it
  88 use constant MINIMAL => ( system('diff','--minimal','/dev/null','/dev/null') >> 8 ==0 ? "--minimal" : "" ) ;
  89
  90
  91
  92 # McIlroy-Hunt diff algorithm
  93 # Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
  94 # by Ned Konz, perl@bike-nomad.com
  95
  96
  97 # Create a hash that maps each element of $aCollection to the set of positions
  98 # it occupies in $aCollection, restricted to the elements within the range of
  99 # indexes specified by $start and $end.
 100 # The fourth parameter is a subroutine reference that will be called to
 101 # generate a string to use as a key.
 102 # Additional parameters, if any, will be passed to this subroutine.
 103 #
 104 # my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
 105
 106 sub _withPositionsOfInInterval
 107 {
 108         my $aCollection = shift;    # array ref
 109         my $start       = shift;
 110         my $end         = shift;
 111         my $keyGen      = shift;
 112         my %d;
 113         my $index;
 114         for ( $index = $start ; $index <= $end ; $index++ )
 115         {
 116                 my $element = $aCollection->[$index];
 117                 my $key = &$keyGen( $element, @_ );
 118                 if ( exists( $d{$key} ) )
 119                 {
 120                         unshift ( @{ $d{$key} }, $index );
 121                 }
 122                 else
 123                 {
 124                         $d{$key} = [$index];
 125                 }
 126         }
 127         return wantarray ? %d : \%d;
 128 }
 129
 130 # Find the place at which aValue would normally be inserted into the array. If
 131 # that place is already occupied by aValue, do nothing, and return undef. If
 132 # the place does not exist (i.e., it is off the end of the array), add it to
 133 # the end, otherwise replace the element at that point with aValue.
 134 # It is assumed that the array's values are numeric.
 135 # This is where the bulk (75%) of the time is spent in this module, so try to
 136 # make it fast!
 137
 138 sub _replaceNextLargerWith
 139 {
 140         my ( $array, $aValue, $high ) = @_;
 141         $high ||= $#$array;
 142
 143         # off the end?
 144         if ( $high == -1 || $aValue > $array->[-1] )
 145         {
 146                 push ( @$array, $aValue );
 147                 return $high + 1;
 148         }
 149
 150         # binary search for insertion point...
 151         my $low = 0;
 152         my $index;
 153         my $found;
 154         while ( $low <= $high )
 155         {
 156                 $index = ( $high + $low ) / 2;
 157
 158                 #               $index = int(( $high + $low ) / 2);             # without 'use integer'
 159                 $found = $array->[$index];
 160
 161                 if ( $aValue == $found )
 162                 {
 163                         return undef;
 164                 }
 165                 elsif ( $aValue > $found )
 166                 {
 167                         $low = $index + 1;
 168                 }
 169                 else
 170                 {
 171                         $high = $index - 1;
 172                 }
 173         }
 174
 175         # now insertion point is in $low.
 176         $array->[$low] = $aValue;    # overwrite next larger
 177         return $low;
 178 }
 179
 180 # This method computes the longest common subsequence in $a and $b.
 181
 182 # Result is array or ref, whose contents is such that
 183 #       $a->[ $i ] == $b->[ $result[ $i ] ]
 184 # foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
 185
 186 # An additional argument may be passed; this is a hash or key generating
 187 # function that should return a string that uniquely identifies the given
 188 # element.  It should be the case that if the key is the same, the elements
 189 # will compare the same. If this parameter is undef or missing, the key
 190 # will be the element as a string.
 191
 192 # By default, comparisons will use "eq" and elements will be turned into keys
 193 # using the default stringizing operator '""'.
 194
 195 # Additional parameters, if any, will be passed to the key generation routine.
 196
 197 sub _longestCommonSubsequence
 198 {
 199         my $a      = shift;    # array ref
 200         my $b      = shift;    # array ref
 201         my $keyGen = shift;    # code ref
 202         my $compare;           # code ref
 203
 204         # set up code refs
 205         # Note that these are optimized.
 206         if ( !defined($keyGen) )    # optimize for strings
 207         {
 208                 $keyGen = sub { $_[0] };
 209                 $compare = sub { my ( $a, $b ) = @_; $a eq $b };
 210         }
 211         else
 212         {
 213                 $compare = sub {
 214                         my $a = shift;
 215                         my $b = shift;
 216                         &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
 217                 };
 218         }
 219
 220         my ( $aStart, $aFinish, $bStart, $bFinish, $matchVector ) =
 221           ( 0, $#$a, 0, $#$b, [] );
 222
 223         # Check whether to use internal routine (small number of elements)
 224         # or use it as a wrapper for UNIX diff
 225         if ( ( $#$a > $#$b ?  $#$a : $#$b) < THRESHOLD ) {
 226           ###     print STDERR "DEBUG: regular longestCommonSubsequence\n";
 227           # First we prune off any common elements at the beginning
 228           while ( $aStart <= $aFinish
 229                   and $bStart <= $bFinish
 230                   and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
 231             {
 232               $matchVector->[ $aStart++ ] = $bStart++;
 233             }
 234
 235           # now the end
 236           while ( $aStart <= $aFinish
 237                 and $bStart <= $bFinish
 238                 and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
 239             {
 240               $matchVector->[ $aFinish-- ] = $bFinish--;
 241             }
 242
 243           # Now compute the equivalence classes of positions of elements
 244           my $bMatches =
 245             _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
 246           my $thresh = [];
 247           my $links  = [];
 248
 249           my ( $i, $ai, $j, $k );
 250           for ( $i = $aStart ; $i <= $aFinish ; $i++ )
 251             {
 252               $ai = &$keyGen( $a->[$i], @_ );
 253               if ( exists( $bMatches->{$ai} ) )
 254                 {
 255                   $k = 0;
 256                   for $j ( @{ $bMatches->{$ai} } )
 257                     {
 258
 259                       # optimization: most of the time this will be true
 260                       if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
 261                         {
 262                           $thresh->[$k] = $j;
 263                         }
 264                       else
 265                         {
 266                           $k = _replaceNextLargerWith( $thresh, $j, $k );
 267                         }
 268
 269                       # oddly, it's faster to always test this (CPU cache?).
 270                       if ( defined($k) )
 271                         {
 272                           $links->[$k] =
 273                             [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
 274                         }
 275                     }
 276                 }
 277             }
 278
 279           if (@$thresh)
 280             {
 281               for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
 282                 {
 283                   $matchVector->[ $link->[1] ] = $link->[2];
 284                 }
 285             }
 286         }
 287         else {
 288           my ($fha,$fhb,$fna,$fnb,$ele,$key);
 289           my ($alines,$blines,$alb,$alf,$blb,$blf);
 290           my ($minimal)=MINIMAL;
 291           # large number of elements, use system diff
 292           ###     print STDERR "DEBUG: fast (diff) longestCommonSubsequence\n";
 293
 294           ($fha,$fna)=tempfile("DiffA-XXXX") or die "_longestCommonSubsequence: Cannot open tempfile for sequence A";
 295           ($fhb,$fnb)=tempfile("DiffB-XXXX") or die "_longestCommonSubsequence: Cannot open tempfile for sequence B";
 296           # prepare sequence A
 297           foreach $ele ( @$a ) {
 298             $key=&$keyGen( $ele, @_ );
 299             $key =~ s/\\/\\\\/g ;
 300             $key =~ s/\n/\\n/sg ;
 301             print $fha "$key\n" ;
 302           }
 303           close($fha);
 304           # prepare sequence B
 305           foreach $ele ( @$b ) {
 306             $key=&$keyGen( $ele, @_ );
 307             $key =~ s/\\/\\\\/g ;
 308             $key =~ s/\n/\\n/sg ;
 309             print $fhb "$key\n" ;
 310           }
 311           close($fhb);
 312
 313           open(DIFFPIPE, "diff $minimal $fna $fnb |") or die "_longestCommonSubsequence: Cannot launch diff process. $!" ;
 314           # The diff line numbering begins with 1, but Perl subscripts start with 0
 315           # We follow the diff numbering but substract 1 when assigning to matchVector
 316           $aStart++; $bStart++ ; $aFinish++ ; $bFinish++ ;
 317           while( <DIFFPIPE> ) {
 318             if ( ($alines,$blines) = ( m/^(\d*(?:,\d*)?)?c(\d*(?:,\d*)?)?$/ ) ) {
 319               ($alb,$alf)=split(/,/,$alines);
 320               ($blb,$blf)=split(/,/,$blines);
 321               $alf=$alb unless defined($alf);
 322               $blf=$blb unless defined($blf);
 323               while($aStart < $alb ) {
 324                 $matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ;
 325               }
 326               # check for consistency
 327               $bStart==$blb or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency in changed sequence";
 328               $aStart=$alf+1;
 329               $bStart=$blf+1;
 330             }
 331             elsif ( ($alb,$blines) = ( m/^(\d*)a(\d*(?:,\d*)?)$/ ) ) {
 332               ($blb,$blf)=split(/,/,$blines);
 333               $blf=$blb unless defined($blf);
 334               while ( $bStart < $blb ) {
 335                 $matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ;
 336               }
 337               $aStart==$alb+1 or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency in appended sequence near elements $aStart and $bStart";
 338               $bStart=$blf+1;
 339             }
 340             elsif ( ($alines,$blb) = ( m/^(\d*(?:,\d*)?)d(\d*)$/ ) ) {
 341               ($alb,$alf)=split(/,/,$alines);
 342               $alf=$alb unless defined($alf);
 343               while ( $aStart < $alb ) {
 344                 $matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ;
 345               }
 346               $bStart==$blb+1 or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency in deleted sequence near elements $aStart and $bStart";
 347               $aStart=$alf+1;
 348             }
 349             elsif ( m/^Binary files/ ) {
 350               # if diff reports it is a binary file force --text mode. I do not like
 351               # to always use this option because it is probably only available in GNU diff
 352               open(DIFFPIPE, "diff --text $fna $fnb |") or die "Cannot launch diff process. $!" ;
 353             }
 354             # Default: just skip line
 355           }
 356           while ($aStart <= $aFinish ) {
 357             $matchVector->[ -1 + $aStart++ ] = -1 + $bStart++ ;
 358           }
 359           $bStart==$bFinish+1  or die "_longestCommonSubsequence: Fatal error in interpreting diff output: Inconsistency at end";
 360           close DIFFPIPE;
 361           # check whether a system error has occurred or return status is greater than or equal to 5
 362           if ( $! || ($? >> 8) > 5) {
 363             print STDERR "diff process failed with exit code ", ($? >> 8), " $!\n";
 364             die;
 365           }
 366           unlink $fna,$fnb ;
 367         }
 368         return wantarray ? @$matchVector : $matchVector;
 369 }
 370
 371 sub traverse_sequences
 372 {
 373         my $a                 = shift;                                  # array ref
 374         my $b                 = shift;                                  # array ref
 375         my $callbacks         = shift || {};
 376         my $keyGen            = shift;
 377         my $matchCallback     = $callbacks->{'MATCH'} || sub { };
 378         my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
 379         my $finishedACallback = $callbacks->{'A_FINISHED'};
 380         my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
 381         my $finishedBCallback = $callbacks->{'B_FINISHED'};
 382         my $matchVector = _longestCommonSubsequence( $a, $b, $keyGen, @_ );
 383
 384         # Process all the lines in @$matchVector
 385         my $lastA = $#$a;
 386         my $lastB = $#$b;
 387         my $bi    = 0;
 388         my $ai;
 389
 390         for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
 391         {
 392                 my $bLine = $matchVector->[$ai];
 393                 if ( defined($bLine) )    # matched
 394                 {
 395                         &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
 396                         &$matchCallback( $ai,    $bi++, @_ );
 397                 }
 398                 else
 399                 {
 400                         &$discardACallback( $ai, $bi, @_ );
 401                 }
 402         }
 403
 404         # The last entry (if any) processed was a match.
 405         # $ai and $bi point just past the last matching lines in their sequences.
 406
 407         while ( $ai <= $lastA or $bi <= $lastB )
 408         {
 409
 410                 # last A?
 411                 if ( $ai == $lastA + 1 and $bi <= $lastB )
 412                 {
 413                         if ( defined($finishedACallback) )
 414                         {
 415                                 &$finishedACallback( $lastA, @_ );
 416                                 $finishedACallback = undef;
 417                         }
 418                         else
 419                         {
 420                                 &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
 421                         }
 422                 }
 423
 424                 # last B?
 425                 if ( $bi == $lastB + 1 and $ai <= $lastA )
 426                 {
 427                         if ( defined($finishedBCallback) )
 428                         {
 429                                 &$finishedBCallback( $lastB, @_ );
 430                                 $finishedBCallback = undef;
 431                         }
 432                         else
 433                         {
 434                                 &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
 435                         }
 436                 }
 437
 438                 &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
 439                 &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
 440         }
 441
 442         return 1;
 443 }
 444
 445 sub traverse_balanced
 446 {
 447         my $a                 = shift;                                  # array ref
 448         my $b                 = shift;                                  # array ref
 449         my $callbacks         = shift || {};
 450         my $keyGen            = shift;
 451         my $matchCallback     = $callbacks->{'MATCH'} || sub { };
 452         my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
 453         my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
 454         my $changeCallback    = $callbacks->{'CHANGE'};
 455         my $matchVector = _longestCommonSubsequence( $a, $b, $keyGen, @_ );
 456
 457         # Process all the lines in match vector
 458         my $lastA = $#$a;
 459         my $lastB = $#$b;
 460         my $bi    = 0;
 461         my $ai    = 0;
 462         my $ma    = -1;
 463         my $mb;
 464
 465         while (1)
 466         {
 467
 468                 # Find next match indices $ma and $mb
 469                 do { $ma++ } while ( $ma <= $#$matchVector && !defined $matchVector->[$ma] );
 470
 471                 last if $ma > $#$matchVector;    # end of matchVector?
 472                 $mb = $matchVector->[$ma];
 473
 474                 # Proceed with discard a/b or change events until
 475                 # next match
 476                 while ( $ai < $ma || $bi < $mb )
 477                 {
 478
 479                         if ( $ai < $ma && $bi < $mb )
 480                         {
 481
 482                                 # Change
 483                                 if ( defined $changeCallback )
 484                                 {
 485                                         &$changeCallback( $ai++, $bi++, @_ );
 486                                 }
 487                                 else
 488                                 {
 489                                         &$discardACallback( $ai++, $bi, @_ );
 490                                         &$discardBCallback( $ai, $bi++, @_ );
 491                                 }
 492                         }
 493                         elsif ( $ai < $ma )
 494                         {
 495                                 &$discardACallback( $ai++, $bi, @_ );
 496                         }
 497                         else
 498                         {
 499
 500                                 # $bi < $mb
 501                                 &$discardBCallback( $ai, $bi++, @_ );
 502                         }
 503                 }
 504
 505                 # Match
 506                 &$matchCallback( $ai++, $bi++, @_ );
 507         }
 508
 509         while ( $ai <= $lastA || $bi <= $lastB )
 510         {
 511                 if ( $ai <= $lastA && $bi <= $lastB )
 512                 {
 513
 514                         # Change
 515                         if ( defined $changeCallback )
 516                         {
 517                                 &$changeCallback( $ai++, $bi++, @_ );
 518                         }
 519                         else
 520                         {
 521                                 &$discardACallback( $ai++, $bi, @_ );
 522                                 &$discardBCallback( $ai, $bi++, @_ );
 523                         }
 524                 }
 525                 elsif ( $ai <= $lastA )
 526                 {
 527                         &$discardACallback( $ai++, $bi, @_ );
 528                 }
 529                 else
 530                 {
 531
 532                         # $bi <= $lastB
 533                         &$discardBCallback( $ai, $bi++, @_ );
 534                 }
 535         }
 536
 537         return 1;
 538 }
 539
 540 sub LCS
 541 {
 542         my $a = shift;                                           # array ref
 543         my $matchVector = _longestCommonSubsequence( $a, @_ );
 544         my @retval;
 545         my $i;
 546         for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
 547         {
 548                 if ( defined( $matchVector->[$i] ) )
 549                 {
 550                         push ( @retval, $a->[$i] );
 551                 }
 552         }
 553         return wantarray ? @retval : \@retval;
 554 }
 555
 556 sub diff
 557 {
 558         my $a      = shift;    # array ref
 559         my $b      = shift;    # array ref
 560         my $retval = [];
 561         my $hunk   = [];
 562         my $discard = sub { push ( @$hunk, [ '-', $_[0], $a->[ $_[0] ] ] ) };
 563         my $add = sub { push ( @$hunk, [ '+', $_[1], $b->[ $_[1] ] ] ) };
 564         my $match = sub { push ( @$retval, $hunk ) if scalar(@$hunk); $hunk = [] };
 565         traverse_sequences( $a, $b,
 566                 { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
 567         &$match();
 568         return wantarray ? @$retval : $retval;
 569 }
 570
 571 sub sdiff
 572 {
 573         my $a      = shift;    # array ref
 574         my $b      = shift;    # array ref
 575         my $retval = [];
 576         my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
 577         my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
 578         my $change = sub {
 579                 push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
 580         };
 581         my $match = sub {
 582                 push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
 583         };
 584         traverse_balanced(
 585                 $a,
 586                 $b,
 587                 {
 588                         MATCH     => $match,
 589                         DISCARD_A => $discard,
 590                         DISCARD_B => $add,
 591                         CHANGE    => $change,
 592                 },
 593                 @_
 594         );
 595         return wantarray ? @$retval : $retval;
 596 }
 597
 598 1;
 599 }
 600 import Algorithm::Diff qw(traverse_sequences);
 601 # End of inserted block for stand-alone version
 602
 603
 604 use Getopt::Long ;
 605 use strict ;
 606 use utf8 ;
 607
 608 my ($algodiffversion)=split(/ /,$Algorithm::Diff::VERSION);
 609
 610
 611 my ($versionstring)=<<EOF ;
 612 This is LATEXDIFF 0.5  (Algorithm::Diff $Algorithm::Diff::VERSION)
 613   (c) 2004-2007 F J Tilmann
 614 EOF
 615
 616 # Configuration variables: these have to be visible from the subroutines
 617 my $MINWORDSBLOCK=3; # minimum number of tokens to form an independent block
 618                      # shorter identical blocks will be merged to the previous word
 619 my $FLOATENV='(?:figure|table|plate)[\w\d*@]*' ;   # Environments in which FL variants of defined commands are used
 620 my $PICTUREENV='(?:picture|DIFnomarkup)[\w\d*@]*' ;   # Environments in which all change markup is removed
 621 my $MATHENV='(?:equation|displaymath|DOLLARDOLLAR)' ;           # Environments turning on display math mode (code also knows about \[ and \]
 622 my $MATHREPL='displaymath';  # Environment introducing deleted maths blocks
 623 my $MATHARRENV='(?:eqnarray|align|gather|multiline|flalign)[*]?' ;           # Environments turning on eqnarray math mode
 624 my $MATHARRREPL='eqnarray*';  # Environment introducing deleted maths blocks
 625 my $ARRENV='(?:array|[pbvBV]matrix)'; # Enviroments making arrays in math mode.  The underlining style does not cope well with those - as a result in-text math environments are surrounded by \mbox{ } if any of these commands is used in an inline math block
 626 my $COUNTERCMD='(?:footnote|part|section|subsection|subsubsection|paragraph|subparagraph)';  # textcmds which are associated with a counter
 627                                         # If any of these commands occur in a deleted block
 628                                         # they will be succeeded by an \addtocounter{...}{-1}
 629                                         # for the associated counter such that the overall numbers
 630                                         # should be the same as in the new file
 631
 632 # Markup strings
 633 # If at all possible, do not change these as parts of the program
 634 # depend on the actual name (particularly post-processing)
 635 # At the very least adapt subroutine postprocess to new tokens.
 636 my $ADDMARKOPEN='\DIFaddbegin ';   # Token to mark begin of appended text
 637 my $ADDMARKCLOSE='\DIFaddend ';   # Token to mark end of appended text
 638 my $ADDOPEN='\DIFadd{';  # To mark begin of added text passage
 639 my $ADDCLOSE='}';        # To mark end of added text passage
 640 my $ADDCOMMENT='DIF > ';   # To mark added comment line
 641 my $DELMARKOPEN='\DIFdelbegin ';   # Token to mark begin of deleted text
 642 my $DELMARKCLOSE='\DIFdelend ';   # Token to mark end of deleted text
 643 my $DELOPEN='\DIFdel{';  # To mark begin of deleted text passage
 644 my $DELCLOSE='}';        # To mark end of deleted text passage
 645 my $DELCMDOPEN='%DIFDELCMD < ';  # To mark begin of deleted commands (must begin with %, i.e., be a comment
 646 my $DELCMDCLOSE="%%%\n";    # To mark end of deleted commands (must end with a new line)
 647 my $AUXCMD='%DIFAUXCMD' ; #  follows auxiliary commands put in by latexdiff to make difference file legal
 648                           # auxiliary commands must be on a line of their own
 649 my $DELCOMMENT='DIF < ';   # To mark deleted comment line
 650
 651
 652 # main local variables:
 653 my @TEXTCMDLIST=();  # array containing patterns of commands with text arguments
 654 my @TEXTCMDEXCL=();  # array containing patterns of commands without text arguments (if a pattern
 655                      # matches both TEXTCMDLIST and TEXTCMDEXCL it is excluded)
 656 my @CONTEXT1CMDLIST=();  # array containing patterns of commands with text arguments (subset of text commands),
 657                          # but which cause confusion if used out of context (e.g. \caption).
 658                          # In deleted passages, the command will be disabled but its argument is marked up
 659                          # Otherwise they behave exactly like TEXTCMD's
 660 my @CONTEXT1CMDEXCL=();  # exclude list for above, but always empty
 661 my @CONTEXT2CMDLIST=();  # array containing patterns of commands with text arguments, but which fail cause confusion
 662                          # if used out of context (e.g. \title). They and their arguments will be disabled in deleted
 663                          # passages
 664 my @CONTEXT2CMDEXCL=();  # exclude list for above, but always empty
 665
 666 my @SAFECMDLIST=();  # array containing patterns of safe commands (which do not break when in the argument of DIFadd or DIFDEL)
 667 my @SAFECMDEXCL=();
 668
 669 my ($i,$j,$l);
 670 my ($old,$new);
 671 my ($line);
 672 my ($newpreamble,$oldpreamble);
 673 my (@newpreamble,@oldpreamble,@diffpreamble,@diffbody);
 674 my ($latexdiffpreamble);
 675 my ($oldbody, $newbody, $diffbo);
 676 my ($oldpost, $newpost);
 677 my ($diffall);
 678 # Option names
 679 my ($type,$subtype,$floattype,$config,$preamblefile,$encoding,
 680     $showpreamble,$showsafe,$showtext,$showconfig,$showall,
 681     $replacesafe,$appendsafe,$excludesafe,
 682     $replacetext,$appendtext,$excludetext,
 683     $replacecontext1,$appendcontext1,
 684     $replacecontext2,$appendcontext2,
 685     $help,$verbose,$version,$ignorewarnings,$allowspaces,$flatten,$debug);
 686 my (@configlist,
 687     @appendsafelist,@excludesafelist,
 688     @appendtextlist,@excludetextlist,
 689     @appendcontext1list,@appendcontext2list,
 690     @packagelist);
 691 my ($assign,@config);
 692 # Hash where keys corresponds to the names of  all included packages (including the documentclass as another package
 693 # the optional arguments to the package are the values of the hash elements
 694 my ($pkg,%packages);
 695 # Defaults
 696 $type='UNDERLINE';
 697 $subtype='SAFE';
 698 $floattype='FLOATSAFE';
 699
 700 $verbose=0;
 701 $debug=1;
 702 # define character properties
 703 sub IsNonAsciiPunct { return <<'END'    # Unicode punctuation but excluding ASCII punctuation
 704 +utf8::IsPunct
 705 -utf8::IsASCII
 706 END
 707 }
 708 sub IsNonAsciiS { return <<'END'       # Unicode symbol but excluding ASCII
 709 +utf8::IsS
 710 -utf8::IsASCII
 711 END
 712 }
 713
 714
 715
 716   my %verbhash;
 717
 718 Getopt::Long::Configure('bundling');
 719 GetOptions('type|t=s' => \$type,
 720            'subtype|s=s' => \$subtype,
 721            'floattype|f=s' => \$floattype,
 722            'config|c=s' => \@configlist,
 723            'preamble|p=s' => \$preamblefile,
 724            'encoding|e=s' => \$encoding,
 725            'exclude-safecmd|A=s' => \@excludesafelist,
 726            'replace-safecmd=s' => \$replacesafe,
 727            'append-safecmd|a=s' => \@appendsafelist,
 728            'exclude-textcmd|X=s' => \@excludetextlist,
 729            'replace-textcmd=s' => \$replacetext,
 730            'append-textcmd|x=s' => \@appendtextlist,
 731            'replace-context1cmd=s' => \$replacecontext1,
 732            'append-context1cmd=s' => \@appendcontext1list,
 733            'replace-context2cmd=s' => \$replacecontext2,
 734            'append-context2cmd=s' => \@appendcontext2list,
 735            'show-preamble' => \$showpreamble,
 736            'show-safecmd' => \$showsafe,
 737            'show-textcmd' => \$showtext,
 738            'show-config' => \$showconfig,
 739            'show-all' => \$showall,
 740            'packages=s' => \@packagelist,
 741            'verbose|V' => \$verbose,
 742            'ignore-warnings' => \$ignorewarnings,
 743            'allow-spaces' => \$allowspaces,
 744            'flatten' => \$flatten,
 745            'version' => \$version,
 746            'help|h|H' => \$help);
 747
 748 if ( $help ) {
 749   usage() ;
 750 }
 751
 752
 753 if ( $version ) {
 754   die $versionstring ;
 755 }
 756
 757 print STDERR $versionstring if $verbose;
 758
 759 if (defined($showall)){
 760   $showpreamble=$showsafe=$showtext=$showconfig=1;
 761 }
 762
 763 # setting extra preamble commands
 764 if (defined($preamblefile)) {
 765   $latexdiffpreamble=join "\n",(extrapream($preamblefile),"");
 766 } else {
 767   $latexdiffpreamble=join "\n",(extrapream($type,$subtype,$floattype),"");
 768 }
 769
 770 # setting up @SAFECMDLIST and @SAFECMDEXCL
 771 if (defined($replacesafe)) {
 772   init_regex_arr_ext(\@SAFECMDLIST,$replacesafe);
 773 } else {
 774   init_regex_arr_data(\@SAFECMDLIST, "SAFE COMMANDS");
 775 }
 776 foreach $appendsafe ( @appendsafelist ) {
 777   init_regex_arr_ext(\@SAFECMDLIST, $appendsafe);
 778 }
 779 foreach $excludesafe ( @excludesafelist ) {
 780   init_regex_arr_ext(\@SAFECMDEXCL, $excludesafe);
 781 }
 782
 783 # Special: treat all cite commands as safe except in UNDERLINE and FONTSTRIKE mode
 784 # (there is a conflict between citation and ulem package, see
 785 # package documentation)
 786 if ( uc($type) ne "UNDERLINE" && uc($type) ne "FONTSTRIKE" && uc($type) ne "CULINECHBAR" ) {
 787   push (@SAFECMDLIST, qr/^cite.*$/);
 788 }
 789
 790 # setting up @TEXTCMDLIST and @TEXTCMDEXCL
 791 if (defined($replacetext)) {
 792   init_regex_arr_ext(\@TEXTCMDLIST,$replacetext);
 793 } else {
 794   init_regex_arr_data(\@TEXTCMDLIST, "TEXT COMMANDS");
 795 }
 796 foreach $appendtext ( @appendtextlist ) {
 797   init_regex_arr_ext(\@TEXTCMDLIST, $appendtext);
 798 }
 799 foreach $excludetext ( @excludetextlist ) {
 800   init_regex_arr_ext(\@TEXTCMDEXCL, $excludetext);
 801 }
 802
 803
 804 # setting up @CONTEXT1CMDLIST ( @CONTEXT1CMDEXCL exist but is always empty )
 805 if (defined($replacecontext1)) {
 806   init_regex_arr_ext(\@CONTEXT1CMDLIST,$replacecontext1);
 807 } else {
 808   init_regex_arr_data(\@CONTEXT1CMDLIST, "CONTEXT1 COMMANDS");
 809 }
 810 foreach $appendcontext1 ( @appendcontext1list ) {
 811   init_regex_arr_ext(\@CONTEXT1CMDLIST, $appendcontext1);
 812 }
 813
 814
 815 # setting up @CONTEXT2CMDLIST ( @CONTEXT2CMDEXCL exist but is always empty )
 816 if (defined($replacecontext2)) {
 817   init_regex_arr_ext(\@CONTEXT2CMDLIST,$replacecontext2);
 818 } else {
 819   init_regex_arr_data(\@CONTEXT2CMDLIST, "CONTEXT2 COMMANDS");
 820 }
 821 foreach $appendcontext2 ( @appendcontext2list ) {
 822   init_regex_arr_ext(\@CONTEXT2CMDLIST, $appendcontext2);
 823 }
 824
 825
 826
 827
 828 # setting configuration variables
 829 @config=();
 830 foreach $config ( @configlist ) {
 831   if (-f $config ) {
 832     open(FILE,$config) or die ("Couldn't open configuration file $config: $!");
 833     while (<FILE>) {
 834       chomp;
 835       next if /^\s*#/ || /^\s*%/ || /^\s*$/ ;
 836       push (@config,$_);
 837     }
 838     close(FILE);
 839   }
 840   else {
 841 #    foreach ( split(",",$config) ) {
 842 #      push @config,$_;
 843 #    }
 844      push @config,split(",",$config)
 845   }
 846 }
 847 foreach $assign ( @config ) {
 848   $assign=~ m/\s*(\w*)\s*=\s*(\S*)\s*$/ or die "Illegal assignment $assign in configuration list (must be variable=value)";
 849   if ( $1 eq "MINWORDSBLOCK" ) { $MINWORDSBLOCK = $2; }
 850   elsif ( $1 eq "FLOATENV" ) { $FLOATENV = $2 ; }
 851   elsif ( $1 eq "PICTUREENV" ) { $PICTUREENV = $2 ; }
 852   elsif ( $1 eq "MATHENV" ) { $MATHENV = $2 ; }
 853   elsif ( $1 eq "MATHREPL" ) { $MATHREPL = $2 ; }
 854   elsif ( $1 eq "MATHARRENV" ) { $MATHARRENV = $2 ; }
 855   elsif ( $1 eq "MATHARRREPL" ) { $MATHARRREPL = $2 ; }
 856   elsif ( $1 eq "ARRENV" ) { $ARRENV = $2 ; }
 857   elsif ( $1 eq "COUNTERCMD" ) { $COUNTERCMD = $2 ; }
 858   else { die "Unknown variable $1 in assignment.";}
 859 }
 860
 861
 862 foreach $pkg ( @packagelist ) {
 863   map { $packages{$_}="" } split(/,/,$pkg) ;
 864 }
 865
 866 if ($showpreamble) {
 867   print "\nPreamble commands:\n";
 868   print $latexdiffpreamble ;
 869 }
 870
 871 if ($showsafe) {
 872   print "\nCommands safe within scope of $ADDOPEN $ADDCLOSE and $DELOPEN $DELCLOSE (unless excluded):\n";
 873   print_regex_arr(@SAFECMDLIST);
 874   print "\nCommands not safe within scope of $ADDOPEN $ADDCLOSE and $DELOPEN $DELCLOSE :\n";
 875   print_regex_arr(@SAFECMDEXCL);
 876 }
 877
 878 if ($showtext) {
 879   print "\nCommands with last argument textual (unless excluded) and safe in every context:\n";
 880   print_regex_arr(@TEXTCMDLIST);
 881   print "\nContext1 commands (last argument textual, command will be disabled in deleted passages, last argument will be shown as plain text):\n";
 882   print_regex_arr(@CONTEXT1CMDLIST);
 883   print "\nContext2 commands (last argument textual, command ant its argument will be disabled in deleted passages):\n";
 884   print_regex_arr(@CONTEXT2CMDLIST);
 885   print "\nExclude list of Commands with last argument not textual (overrides patterns above):\n";
 886   print_regex_arr(@TEXTCMDEXCL);
 887 }
 888
 889 if ($showconfig) {
 890   print "Configuration variables:\n";
 891   print "MINWORDSBLOCK=$MINWORDSBLOCK\n";
 892   print "FLOATENV=$FLOATENV\n";
 893   print "PICTUREENV=$PICTUREENV\n";
 894   print "MATHENV=$MATHENV\n";
 895   print "MATHREPL=$MATHREPL\n";
 896   print "MATHARRENV=$MATHARRENV\n";
 897   print "MATHARRREPL=$MATHARRREPL\n";
 898   print "ARRENV=$ARRENV\n";
 899   print "COUNTERCMD=$COUNTERCMD\n";
 900 }
 901 if ($showconfig || $showtext || $showsafe || $showpreamble) {
 902   exit 0; }
 903 if ( @ARGV != 2 ) {
 904   print STDERR "2 and only 2 non-option arguments required.  Write latexdiff -h to get help\n";
 905   exit(2);
 906 }
 907
 908 # Are extra spaces between command arguments permissible?
 909 my $extraspace;
 910 if ($allowspaces) {
 911   $extraspace='\s*';
 912 } else {
 913   $extraspace='';
 914 }
 915
 916 # append context lists to text lists (as text property is implied)
 917 push @TEXTCMDLIST, @CONTEXT1CMDLIST;
 918 push @TEXTCMDLIST, @CONTEXT2CMDLIST;
 919
 920
 921 # Patterns. These are used by some of the subroutines, too
 922 # I can only define them down here because value of extraspace depends on an option
 923   my $pat0 = '(?:[^{}]|\\\{|\\\})*';
 924   my $pat1 = '(?:[^{}]|\\\{|\\\}|\{'.$pat0.'\})*';
 925   my $pat2 = '(?:[^{}]|\\\{|\\\}|\{'.$pat1.'\})*'; #
 926   my $pat3 = '(?:[^{}]|\\\{|\\\}|\{'.$pat2.'\})*';
 927   my $pat4 = '(?:[^{}]|\\\{|\\\}|\{'.$pat3.'\})*';
 928   my $brat0 = '(?:[^\[\]]|\\\[|\\\])*';
 929
 930   my $quotemarks = '(?:\'\')|(?:\`\`)';
 931   my $punct='[0.,\/\'\`:;\"\?\(\)\[\]!~\p{IsNonAsciiPunct}\p{IsNonAsciiS}]';
 932   my $number='-?\d*\.\d*';
 933   my $mathpunct='[+=<>\-\|]';
 934   my $and = '&';
 935   my $coords= '[\-.,\s\d]*';
 936 # word: sequence of letters or accents followed by letter
 937   my $word='(?:[-\w\d*]|\\\\[\"\'\`~^][A-Za-z\*])+';
 938   my $cmdleftright='\\\\(?:left|right)\s*(?:[()\[\]|]|\\\\(?:[|{}]|\w+))';
 939
 940   my $cmdoptseq='\\\\[\w\d\*]+'.$extraspace.'(?:(?:\['.$brat0.'\]|\{'. $pat4 . '\}|\(' . $coords .'\))'.$extraspace.')*';
 941   my $oneletcmd='\\\\.(?:\['.$brat0.'\]|\{'. $pat4 . '\})*';
 942   my $math='\$(?:[^$]|\\\$)*?\$|\\\\[(].*?\\\\[)]';
 943 #  my $math='\$(?:[^$]|\\\$)*\$';
 944   my $comment='%.*?\n';
 945   my $pat=qr/(?:\A\s*)?(?:${and}|${quotemarks}|${number}|${word}|$cmdleftright|${cmdoptseq}|${math}|${oneletcmd}|${comment}|${punct}|${mathpunct}|\{|\})\s*/ ;
 946
 947
 948
 949 # now we are done setting up and can start working
 950 my ($oldfile, $newfile) = @ARGV;
 951
 952 $encoding=guess_encoding($newfile) unless defined($encoding);
 953
 954 $encoding = "utf8" if $encoding =~ m/^utf8/i ;
 955 if (lc($encoding) eq "utf8" ) {
 956   binmode(STDOUT, ":utf8");
 957   binmode(STDERR, ":utf8");
 958 }
 959
 960 $old=read_file_with_encoding($oldfile,$encoding);
 961 $new=read_file_with_encoding($newfile,$encoding);
 962
 963
 964
 965
 966 # reset time
 967 exetime(1);
 968 ($oldpreamble,$oldbody,$oldpost)=splitdoc($old,'\\\\begin\{document\}','\\\\end\{document\}');
 969
 970
 971 ($newpreamble,$newbody,$newpost)=splitdoc($new,'\\\\begin\{document\}','\\\\end\{document\}');
 972
 973
 974 if ($flatten) {
 975   $oldbody=flatten($oldbody,$oldpreamble,$oldfile,$encoding);
 976   $newbody=flatten($newbody,$newpreamble,$newfile,$encoding);
 977 }
 978
 979 if ( length $oldpreamble && length $newpreamble ) {
 980   @oldpreamble = split /\n/, $oldpreamble;
 981   @newpreamble = split /\n/, $newpreamble;
 982
 983   %packages=list_packages(@newpreamble) unless %packages;
 984   if (defined $packages{"hyperref"} ) {
 985     print STDERR "hyperref package detected.\n" if $verbose ;
 986     $latexdiffpreamble =~ s/\{\\DIFadd\}/{\\DIFaddtex}/g;
 987     $latexdiffpreamble =~ s/\{\\DIFdel\}/{\\DIFdeltex}/g;
 988     $latexdiffpreamble .= join "\n",(extrapream("HYPERREF"),"");
 989   }
 990   # insert dummy first line such that line count begins with line 1 (rather than perl's line 0) - just so that line numbers inserted by linediff are correct
 991   unshift @newpreamble,'';
 992   unshift @oldpreamble,'';
 993   print STDERR "Differencing preamble.\n" if $verbose;
 994   @diffpreamble = linediff(\@oldpreamble, \@newpreamble);
 995   # remove dummy line again
 996   shift @diffpreamble;
 997   push @diffpreamble,$latexdiffpreamble;
 998   push @diffpreamble,'\begin{document}';
 999 }
1000 elsif ( !length $oldpreamble && !length $newpreamble ) {
1001   @diffpreamble=();
1002 } else {
1003   print STDERR "Either both texts must have preamble or neither text must have the preamble.\n";
1004   exit(2);
1005 }
1006
1007 if (defined $packages{"amsmath"} ) {
1008   print STDERR "amsmath package detected.\n" if $verbose ;
1009   $MATHARRREPL='align*';
1010 }
1011
1012 print STDERR "Preprocessing body.  " if $verbose;
1013 my ($oldleadin,$newleadin)=preprocess($oldbody,$newbody);
1014
1015
1016 # run difference algorithm
1017 @diffbody=bodydiff($oldbody, $newbody);
1018 $diffbo=join("",@diffbody);
1019 print STDERR "(",exetime()," s)\n","Postprocessing body. \n " if $verbose;
1020 postprocess($diffbo);
1021 $diffall =join("\n",@diffpreamble) ;
1022 $diffall .= "$newleadin$diffbo" ;
1023 $diffall .= "\\end{document}$newpost" if length $newpreamble ;
1024 if ( lc($encoding) ne "utf8" && lc($encoding) ne "ascii" ) {
1025   print STDERR "Encoding output file to $encoding\n" if $verbose;
1026   $diffall=Encode::encode($encoding,$diffall);
1027   binmode STDOUT;
1028 }
1029 print $diffall;
1030
1031
1032 print STDERR "(",exetime()," s)\n","Done.\n" if $verbose;
1033
1034
1035
1036 ## guess_encoding(filename)
1037 ## reads the first 20 lines of filename and looks for call of inputenc package
1038 ## if found, return the option of this package (encoding), otherwise return ascii
1039 sub guess_encoding {
1040   my ($filename)=@_;
1041   my ($i,$enc);
1042   open (FH, $filename) or die("Couldn't open $filename: $!");
1043   $i=0;
1044   while (<FH>) {
1045     next if /^\s*%/;    # skip comment lines
1046     if (m/\\usepackage\[(\w*?)\]\{inputenc\}/) {
1047       close(FH);
1048       return($1);
1049     }
1050     last if (++$i > 20 ); # scan at most 20 non-comment lines
1051   }
1052   close(FH);
1053   return("ascii");
1054 }
1055
1056
1057 sub read_file_with_encoding {
1058   my ($output);
1059   my ($filename, $encoding) = @_;
1060
1061   if (lc($encoding) eq "utf8" ) {
1062     open (FILE, "<:utf8",$filename) or die("Couldn't open $filename: $!");
1063     local $/ ; # locally set record operator to undefined, ie. enable whole-file mode
1064     $output=<FILE>;
1065   } elsif ( lc($encoding) eq "ascii") {
1066     open (FILE, $filename) or die("Couldn't open $filename: $!");
1067     local $/ ; # locally set record operator to undefined, ie. enable whole-file mode
1068     $output=<FILE>;
1069   } else {
1070     require Encode;
1071     open (FILE, "<",$filename) or die("Couldn't open $filename: $!");
1072     local $/ ; # locally set record operator to undefined, ie. enable whole-file mode
1073     $output=<FILE>;
1074     print STDERR "Converting $filename from $encoding to utf8\n" if $verbose;
1075     $output=Encode::decode($encoding,$output);
1076   }
1077   close FILE;
1078   return $output;
1079 }
1080
1081 # %packages=list_packages(@preamble)
1082 # scans the arguments for \documentclass and \usepackage statements and constructs a hash
1083 # whose keys are the included packages, and whose values are the associated optional arguments
1084 sub list_packages {
1085   my (@preamble)=@_;
1086   my %packages=();
1087   foreach $line ( @preamble ) {
1088     # get rid of comments
1089     $line=~s/(?<!\\)%.*$// ;
1090     if ( $line =~ m/\\(?:documentclass|usepackage)(?:\[(.+?)\])?\{(.*?)\}/ ) {
1091 #      print STDERR "Found something: |$line|\n";
1092       if (defined($1)) {
1093         $packages{$2}=$1;
1094       } else {
1095         $packages{$2}="";
1096       }
1097     }
1098   }
1099   return (%packages);
1100 }
1101
1102 # flatten($text,$preamble,$filename,$encoding)
1103 # expands \input and \include commands within text
1104 # premable is scanned for includeonly commands
1105 # encoding is the encoding
1106 sub flatten {
1107   my ($text,$preamble,$filename,$encoding)=@_;
1108   my ($includeonly,$dirname,$fname,$newpage,$replacement);
1109   require File::Basename ;
1110   require File::Spec ;
1111   $dirname = File::Basename::dirname($filename);
1112
1113   if ( ($includeonly) = ($preamble =~ m/\\includeonly{(.*?)}/ ) ) {
1114     $includeonly =~ s/,/|/g;
1115   } else {
1116     $includeonly = '.*?';
1117   }
1118
1119   print STDERR "DEBUG: includeonly $includeonly\n";
1120
1121   $text=~s/\\input{(.*?)}|\\include{(${includeonly}(?:\.tex)?)}/{
1122             $fname = $1 if defined($1)  ;
1123             $fname = $2 if defined($2) ;
1124             #      # add tex extension unless there is a three letter extension already
1125             $fname .= ".tex" unless $fname =~ m|\.\w{3}|;
1126             print STDERR "Include file $fname\n" if $verbose;
1127             print STDERR "DEBUG looking for file ",File::Spec->catfile($dirname,$fname), "\n";
1128             # content of file becomes replacement value, add \newpage if the command was include
1129             $replacement=read_file_with_encoding(File::Spec->catfile($dirname,$fname), $encoding) or die
1130                 "Couldn't find file ",File::Spec->catfile($dirname,$fname),": $!";
1131             $newpage=(defined($2)? " \\newpage " : "") ;
1132             "$newpage$replacement$newpage";
1133           }/exg;
1134
1135   return($text);
1136 }
1137
1138
1139 # print_regex_arr(@arr)
1140 # prints regex array without x-ism expansion put in by pearl to stdout
1141 sub print_regex_arr {
1142   my $dumstring;
1143   $dumstring = join(" ",@_);     # PERL generates string (?-xism:^ref$) for quoted refex ^ref$
1144   $dumstring =~ s/\(\?-xism:\^(.*?)\$\)/$1/g;   # remove string and ^,$ marks before output
1145   print $dumstring,"\n";
1146 }
1147
1148
1149 # @lines=extrapream($type)
1150 # reads line from appendix (end of file after __END__ token)
1151 sub extrapream {
1152   my $type;
1153   my @retval=("%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF") ;
1154   my ($copy);
1155
1156   while (@_) {
1157     $copy=0;
1158     $type=shift ;
1159     if ( -f $type ) {
1160       open (FILE,$type) or die "Cannot open preamble file $type: $!";
1161       print STDERR "Reading preamble file $type\n" if $verbose ;
1162       while (<FILE>) {
1163         chomp ;
1164         if ( $_ =~ m/%DIF PREAMBLE/ ) {
1165           push (@retval,"$_");
1166         } else {
1167           push (@retval,"$_ %DIF PREAMBLE");
1168         }
1169       }
1170     }
1171     else {    # not (-f $type)
1172       $type=uc($type);   # upcase argument
1173       print STDERR "Preamble Internal Type $type\n" if $verbose;
1174       while (<DATA>) {
1175         if ( m/^%DIF $type/ ) {
1176           $copy=1; }
1177         elsif ( m/^%DIF END $type/ ) {
1178           last; }
1179         chomp;
1180         push (@retval,"$_ %DIF PREAMBLE") if $copy;
1181       }
1182       if ( $copy == 0 ) {
1183         print STDERR "\nPreamble style $type not implemented.\n";
1184         print STDERR "Write latexdiff -h to get help with available styles\n";
1185         exit(2);
1186       }
1187       seek DATA,0,0;    # rewind DATA handle to file begin
1188     }
1189   }
1190   push (@retval,"%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF")  ;
1191   return @retval;
1192 }
1193
1194
1195 # ($part1,$part2,$part3)=splitdoc($text,$word1,$word2)
1196 # splits $text into 3 parts at $word1 and $word2.
1197 # if neither $word1 nor $word2 exist, $part1 and $part3 are empty, $part2 is $text
1198 # If only $word1 or $word2 exist but not the other, output an error message.
1199
1200 # NB this version avoids $` and $' for performance reason although it only makes a tiny difference
1201 # (in one test gain a tenth of a second for a 30s run)
1202 sub splitdoc {
1203   my ($text,$word1,$word2)=@_;
1204   my ($part1,$part2,$part3)=("","","");
1205   my ($rest,$pos);
1206
1207   if ( $text =~ m/(^[^%]*)($word1)/mg ) {
1208     $pos=pos $text;
1209     $part1=substr($text,0,$pos-length($2));
1210     $rest=substr($text,$pos);
1211     if ( $rest =~ m/(^[^%]*)($word2)/mg ) {
1212       $pos=pos $rest;
1213       $part2=substr($rest,0,$pos-length($2));
1214       $part3=substr($rest,$pos);
1215     }
1216     else {
1217       die "$word1 and $word2 not in the correct order or not present as a pair." ;
1218     }
1219   } else {
1220     $part2=$text;
1221     die "$word2 present but not $word1." if ( $text =~ m/(^[^%]*)$word2/ms );
1222   }
1223   return ($part1,$part2,$part3);
1224 }
1225
1226
1227
1228
1229
1230 # bodydiff($old,$new)
1231 sub bodydiff {
1232   my ($oldwords, $newwords) = @_;
1233   my @retwords;
1234
1235   print STDERR "(",exetime()," s)\n","Splitting into latex tokens \n" if $verbose;
1236   print STDERR "Parsing $oldfile \n" if $verbose;
1237   my @oldwords = splitlatex($oldwords);
1238   print STDERR "Parsing $newfile \n" if $verbose;
1239   my @newwords = splitlatex($newwords);
1240   my $token;
1241
1242   print STDERR "(",exetime()," s)\n","Pass 1: Expanding text commands and merging isolated identities with changed blocks  " if $verbose;
1243   pass1(\@oldwords, \@newwords);
1244
1245
1246   print STDERR "(",exetime()," s)\n","Pass 2: inserting DIF tokens and mark up.  " if $verbose;
1247
1248   @retwords=pass2(\@oldwords, \@newwords);
1249
1250   return(@retwords);
1251 }
1252
1253
1254
1255
1256 # @words=splitlatex($string)
1257 # split string according to latex rules
1258 # Each element of words is either
1259 # a word (including trailing spaces and punctuation)
1260 # a latex command
1261 sub splitlatex {
1262   my ($string) = @_ ;
1263   my @retval=($string =~ m/$pat/osg);
1264
1265   if (length($string) != length(join("",@retval))) {
1266     print STDERR "\nWARNING: Inconsistency in length of input string and parsed string:\n     This often indicates faulty or non-standard latex code.\n     In many cases you can ignore this and the following warning messages.\n Note that character numbers in the following are counted beginning after \\begin{document} and are only approximate." unless $ignorewarnings;
1267     print STDERR "DEBUG Original length ",length($string),"  Parsed length ",length(join("",@retval)),"\n" if $debug;
1268     print STDERR "DEBUG Input string:  |$string|\n" if (length($string)<500) && $debug;
1269     print STDERR "DEBUG Token parsing: |",join("+",@retval),"|\n" if (length($string)<500) && $debug ;
1270     @retval=();
1271     # slow way only do this if other m//sg method fails
1272     my $last = 0;
1273     while ( $string =~ m/$pat/osg ) {
1274       my $match=$&;
1275       if ($last + length $& != pos $string  ) {
1276         my $pos=pos($string);
1277         my $offset=30<$last ? 30 : $last;
1278         my $dum=substr($string,$last-$offset,$pos-$last+2*$offset);
1279         my $dum1=$dum;
1280         my $cnt=$#retval;
1281         my $i;
1282         $dum1 =~ s/\n/ /g;
1283         unless ($ignorewarnings) {
1284           print STDERR "\n$dum1\n";
1285           print STDERR " " x 30,"^" x ($pos-$last)," " x 30,"\n";
1286           print STDERR "Missing characters near word " . (scalar @retval) . " character index: " . $last . "-" .  pos($string) . " Length: " . length($match) . " Match: |$match| (expected match marked above).\n";
1287         }
1288           # put in missing characters `by hand'
1289         push (@retval, substr($dum,$offset,$pos-$last-length($match)));
1290 #       Note: there seems to be a bug in substr with utf8 that made the following line output substr which were too long,
1291 #             using dum instead appears to work
1292 #       push (@retval, substr($string,$last, pos($string)-$last-length($match)));
1293       }
1294       push (@retval, $match);
1295       $last=pos $string;
1296     }
1297
1298   }
1299   return @retval;
1300 }
1301
1302
1303 # pass1( \@seq1,\@seq2)
1304 # Look for differences between seq1 and seq2.
1305 # Where an common-subsequence block is flanked by deleted or appended blocks,
1306 # and is shorter than $MINWORDSBLOCK words it is appended
1307 # to the last deleted or appended word.  If the block contains tokens other than words
1308 # or punctuation it is not merged.
1309 # Deleted or appended block consisting of words and safe commands only are
1310 # also merged, to prevent break-up in pass2 (after previous isolated words have been removed)
1311 # If there are commands with textual arguments (e.g. \caption) both in corresponding
1312 # appended and deleted blocks split them such that the command and opening bracket
1313 # are one token, then the rest is split up following standard rules, and the closing
1314 # bracket is a separate token, ie. turn
1315 # "\caption{This is a textual argument}" into
1316 # ("\caption{","This ","is ","a ","textual ","argument","}")
1317 # No return value.  Destructively changes sequences
1318 sub pass1 {
1319   my $seq1 = shift ;
1320   my $seq2 = shift ;
1321
1322   my $len1 = scalar @$seq1;
1323   my $len2 = scalar @$seq2;
1324   my $wpat=qr/^(?:[a-zA-Z.,'`:;?()!]*)[\s~]*$/;   #'
1325
1326   my ($last1,$last2)=(-1,-1) ;
1327   my $cnt=0;
1328   my $block=[];
1329   my $addblock=[];
1330   my $delblock=[];
1331   my $todo=[];
1332   my $instruction=[];
1333   my $i;
1334   my (@delmid,@addmid,@dummy);
1335
1336   my ($addcmds,$delcmds,$matchindex);
1337   my ($addtextblocks,$deltextblocks);
1338   my ($addtokcnt,$deltokcnt,$mattokcnt)=(0,0,0);
1339   my ($addblkcnt,$delblkcnt,$matblkcnt)=(0,0,0);
1340
1341   my $adddiscard = sub {
1342                       if ($cnt > 0 ) {
1343                         $matblkcnt++;
1344                         # just after an unchanged block
1345 #                       print STDERR "Unchanged block $cnt, $last1,$last2 \n";
1346                         if ($cnt < $MINWORDSBLOCK
1347                             && $cnt==scalar (
1348                                      grep { /^$wpat/ || ( /^\\([\w\d\*]+)((?:\[$brat0\]|\{$pat4\})*)/o
1349                                                            && iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL)
1350                                                            && scalar(@dummy=split(" ",$2))<3 ) }
1351                                              @$block) )  {
1352                           # merge identical blocks shorter than $MINWORDSBLOCK
1353                           # and only containing ordinary words
1354                           # with preceding different word
1355                           # We cannot carry out this merging immediately as this
1356                           # would change the index numbers of seq1 and seq2 and confuse
1357                           # the algorithm, instead we store in @$todo where we have to merge
1358                           push(@$todo, [ $last1,$last2,$cnt,@$block ]);
1359                         }
1360                         $block = [];
1361                         $cnt=0; $last1=-1; $last2=-1;
1362                       }
1363                     };
1364   my $discard=sub { $deltokcnt++;
1365                     &$adddiscard; #($_[0],$_[1]);
1366                     push(@$delblock,[ $seq1->[$_[0]],$_[0] ]);
1367                     $last1=$_[0] };
1368
1369   my $add =   sub { $addtokcnt++;
1370                     &$adddiscard; #($_[0],$_[1]);
1371                     push(@$addblock,[ $seq2->[$_[1]],$_[1] ]);
1372                     $last2=$_[1] };
1373
1374   my $match = sub { $mattokcnt++;
1375                     if ($cnt==0) {   # first word of matching sequence after changed sequence or at beginning of word sequence
1376                       $deltextblocks = extracttextblocks($delblock);
1377                       $delblkcnt++ if scalar @$delblock;
1378                       $addtextblocks = extracttextblocks($addblock);
1379                       $addblkcnt++ if scalar @$addblock;
1380
1381                       $delcmds = extractcommands($delblock);
1382                       $addcmds = extractcommands($addblock);
1383                       # keygen(third argument of _longestCommonSubsequence) implies to sort on command (0th elements of $addcmd elements)
1384                       # the calling format for longestCommonSubsequence has changed between versions of
1385                       # Algorithm::Diff so we need to check which one we are using
1386                       if ( $algodiffversion  > 1.15 ) {
1387                         ### Algorithm::Diff 1.19
1388                         $matchindex=Algorithm::Diff::_longestCommonSubsequence($delcmds,$addcmds, 0, sub { $_[0]->[0] } );
1389                       } else {
1390                         ### Algorithm::Diff 1.15
1391                         $matchindex=Algorithm::Diff::_longestCommonSubsequence($delcmds,$addcmds, sub { $_[0]->[0] } );
1392                       }
1393
1394                       for ($i=0 ; $i<=$#$matchindex ; $i++) {
1395                         if (defined($matchindex->[$i])){
1396                           $j=$matchindex->[$i];
1397                           @delmid=splitlatex($delcmds->[$i][3]);
1398                           @addmid=splitlatex($addcmds->[$j][3]);
1399                           while (scalar(@$deltextblocks)  && $deltextblocks->[0][0]<$delcmds->[$i][1]) {
1400                             my ($index,$block,$cnt)=@{ shift(@$deltextblocks) };
1401                             push(@$todo, [$index,-1,$cnt,@$block]);
1402                           }
1403                           push(@$todo, [ $delcmds->[$i][1],-1,-1,$delcmds->[$i][2],@delmid,$delcmds->[$i][4]]);
1404
1405                           while (scalar(@$addtextblocks) && $addtextblocks->[0][0]<$addcmds->[$j][1]) {
1406                             my ($index,$block,$cnt)=@{ shift(@$addtextblocks) };
1407                             push(@$todo, [-1,$index,$cnt,@$block]);
1408                           }
1409                           push(@$todo, [ -1,$addcmds->[$j][1],-1,$addcmds->[$j][2],@addmid,$addcmds->[$j][4]]);
1410                         }
1411                       }
1412                       # mop up remaining textblocks
1413                       while (scalar(@$deltextblocks)) {
1414                         my ($index,$block,$cnt)=@{ shift(@$deltextblocks) } ;
1415                         push(@$todo, [$index,-1,$cnt,@$block]);
1416                       }
1417                       while (scalar(@$addtextblocks)) {
1418                         my ($index,$block,$cnt)=@{ shift(@$addtextblocks) };
1419                         push(@$todo, [-1,$index,$cnt,@$block]);
1420                       }
1421
1422                       $addblock=[];
1423                       $delblock=[];
1424                     }
1425                     push(@$block,$seq2->[$_[1]]);
1426                     $cnt++  };
1427
1428   my $keyfunc = sub { join("  ",split(" ",shift())) };
1429
1430   traverse_sequences($seq1,$seq2, { MATCH=>$match, DISCARD_A=>$discard, DISCARD_B=>$add }, $keyfunc );
1431
1432
1433   # now carry out the merging/splitting.  Refer to elements relative from
1434   # the end (with negative indices) as these offsets don't change before the instruction is executed
1435   # cnt>0: merged small unchanged groups with previous changed blocks
1436   # cnt==-1: split textual commands into components
1437   foreach $instruction ( @$todo) {
1438     ($last1,$last2,$cnt,@$block)=@$instruction ;
1439     if ($cnt>=0) {
1440       splice(@$seq1,$last1-$len1,1+$cnt,join("",$seq1->[$last1-$len1],@$block)) if $last1>=0;
1441       splice(@$seq2,$last2-$len2,1+$cnt,join("",$seq2->[$last2-$len2],@$block)) if $last2>=0;
1442     } else {
1443       splice(@$seq1,$last1-$len1,1,@$block) if $last1>=0;
1444       splice(@$seq2,$last2-$len2,1,@$block) if $last2>=0;
1445     }
1446   }
1447
1448   if ($verbose) {
1449     print STDERR "\n";
1450     print STDERR "  $mattokcnt matching  tokens in $matblkcnt blocks.\n";
1451     print STDERR "  $deltokcnt discarded tokens in $delblkcnt blocks.\n";
1452     print STDERR "  $addtokcnt appended  tokens in $addblkcnt blocks.\n";
1453   }
1454 }
1455
1456
1457 # extracttextblocks(\@blockindex)
1458 # $blockindex has the following format
1459 # [ [ token1, index1 ], [token2, index2],.. ]
1460 # where index refers to the index in the original old or new word sequence
1461 # Returns: reference to an array of the form
1462 # [[ $index, $textblock, $cnt ], ..
1463 # where $index index of block to be merged
1464 #       $textblock contains all the words to be merged with the word at $index (but does not contain this word)
1465 #       $cnt   is length of block
1466 #
1467 # requires: iscmd
1468 #
1469 sub extracttextblocks {
1470   my $block=shift;
1471   my ($i,$token,$index);
1472   my $textblock=[];
1473   my $last=-1;
1474   my $wpat=qr/^(?:[a-zA-Z.,'`:;?()!]*)[\s~]*$/;  #'
1475   my $retval=[];
1476
1477   for ($i=0;$i< scalar @$block;$i++) {
1478     ($token,$index)=@{ $block->[$i] };
1479     # store pure text blocks
1480     if ($token =~ /$wpat/ ||  ( $token =~/^\\([\w\d\*]+)((?:${extraspace}\[$brat0\]${extraspace}|${extraspace}\{$pat4\})*)/o
1481                                 && iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL)
1482                                 && !iscmd($1,\@TEXTCMDLIST,\@TEXTCMDEXCL))) {
1483       # we have text or a command which can be treated as text
1484       if ($last<0) {
1485         # new pure-text block
1486         $last=$index;
1487       } else {
1488         # add to pure-text block
1489         push(@$textblock, $token);
1490       }
1491     } else {
1492       # it is not text
1493       if (scalar(@$textblock)) {
1494         push(@$retval,[ $last, $textblock, scalar(@$textblock) ]);
1495       }
1496       $textblock=[];
1497       $last=-1;
1498     }
1499   }
1500   # finish processing a possibly unfinished block before returning
1501   if (scalar(@$textblock)) {
1502     push(@$retval,[ $last, $textblock, scalar(@$textblock) ]);
1503   }
1504   return($retval)
1505 }
1506
1507
1508
1509 # extractcommands( \@blockindex )
1510 # $blockindex has the following format
1511 # [ [ token1, index1 ], [token2, index2],.. ]
1512 # where index refers to the index in the original old or new word sequence
1513 # Returns: reference to an array of the form
1514 # [ [ "\cmd1", index, "\cmd1[optarg]{arg1}{", "arg2" ,"} " ],..
1515 # where index is just taken from input array
1516 # command must have a textual argument as last argument
1517 #
1518 # requires: iscmd
1519 #
1520 sub extractcommands {
1521   my $block=shift;
1522   my ($i,$token,$index,$cmd,$open,$mid,$closing);
1523   my $retval=[];
1524
1525   for ($i=0;$i< scalar @$block;$i++) {
1526     ($token,$index)=@{ $block->[$i] };
1527     # check if token is an alphanumeric command sequence with at least one non-optional argument
1528     # \cmd[...]{...}{last argument}
1529     # Capturing in the following results in these associations
1530     # $1: \cmd[...]{...}{
1531     # $2: \cmd
1532     # $3: last argument
1533     # $4: }  + trailing spaces
1534     if ( ( $token =~ m/^(\\([\w\d\*]+)(?:${extraspace}\[$brat0\]|${extraspace}\{$pat4\})*${extraspace}\{)($pat4)(\}\s*)$/so )
1535          && iscmd($2,\@TEXTCMDLIST,\@TEXTCMDEXCL) ) {
1536       #      push(@$retval,[ $2,$index,$1,$3,$4 ]);
1537       ($cmd,$open,$mid,$closing) = ($2,$1,$3,$4) ;
1538       $closing =~ s/\}/\\RIGHTBRACE/ ;
1539       push(@$retval,[ $cmd,$index,$open,$mid,$closing ]);
1540     }
1541   }
1542   return $retval;
1543 }
1544
1545 # iscmd($cmd,\@regexarray,\@regexexcl) checks
1546 # return 1 if $cmd matches any of the patterns in the
1547 # array $@regexarray, and none of the patterns in \@regexexcl, otherwise return 0
1548 sub iscmd {
1549   my ($cmd,$regexar,$regexexcl)=@_;
1550   my ($ret)=0;
1551   foreach $pat ( @$regexar ) {
1552     if ( $cmd =~ m/^${pat}$/ ) {
1553       $ret=1 ;
1554       last;
1555     }
1556   }
1557   return 0 unless $ret;
1558   foreach $pat ( @$regexexcl ) {
1559     return 0 if ( $cmd =~ m/^${pat}$/ );
1560   }
1561   return 1;
1562 }
1563
1564
1565 # pass2( \@seq1,\@seq2)
1566 # Look for differences between seq1 and seq2.
1567 # Mark begin and end of deleted and appended sequences with tags $DELOPEN and $DELCLOSE
1568 # and $ADDOPEN and $ADDCLOSE, respectively, however exclude { } & and all comands, unless
1569 # they match an element of the whitelist (SAFECMD)
1570 # For words in TEXTCMD but not in SAFECMD, enclose interior with $ADDOPEN and $ADDCLOSE brackets
1571 # Deleted comment lines are marked with %DIF <
1572 # Added comment lines are marked with %DIF >
1573 sub pass2 {
1574   my $seq1 = shift ;
1575   my $seq2 = shift ;
1576
1577   my ($addtokcnt,$deltokcnt,$mattokcnt)=(0,0,0);
1578   my ($addblkcnt,$delblkcnt,$matblkcnt)=(0,0,0);
1579
1580   my $retval = [];
1581   my $delhunk   = [];
1582   my $addhunk   = [];
1583
1584   my $discard = sub { $deltokcnt++;
1585                       push ( @$delhunk, $seq1->[$_[0]]) };
1586
1587   my $add = sub { $addtokcnt++;
1588                   push ( @$addhunk, $seq2->[$_[1]]) };
1589
1590   my $match = sub { $mattokcnt++;
1591                     if ( scalar @$delhunk ) {
1592                       $delblkcnt++;
1593                       # mark up changes, but comment out commands
1594                       push @$retval,marktags($DELMARKOPEN,$DELMARKCLOSE,$DELOPEN,$DELCLOSE,$DELCMDOPEN,$DELCMDCLOSE,$DELCOMMENT,$delhunk);
1595                       $delhunk = [];
1596                     }
1597                     if ( scalar @$addhunk ) {
1598                       $addblkcnt++;
1599                       # we mark up changes, but simply quote commands
1600                       push @$retval,marktags($ADDMARKOPEN,$ADDMARKCLOSE,$ADDOPEN,$ADDCLOSE,"","",$ADDCOMMENT,$addhunk);
1601                       $addhunk = [];
1602                     }
1603                     push(@$retval,$seq2->[$_[1]]) };
1604
1605   my $keyfunc = sub { join("  ",split(" ",shift())) };
1606
1607   traverse_sequences($seq1,$seq2, { MATCH=>$match, DISCARD_A=>$discard, DISCARD_B=>$add }, $keyfunc );
1608   # clear up unprocessed hunks
1609   push @$retval,marktags($DELMARKOPEN,$DELMARKCLOSE,$DELOPEN,$DELCLOSE,$DELCMDOPEN,$DELCMDCLOSE,$DELCOMMENT,$delhunk) if scalar @$delhunk;
1610   push @$retval,marktags($ADDMARKOPEN,$ADDMARKCLOSE,$ADDOPEN,$ADDCLOSE,"","",$ADDCOMMENT,$addhunk) if scalar @$addhunk;
1611
1612
1613   if ($verbose) {
1614     print STDERR "\n";
1615     print STDERR "  $mattokcnt matching  tokens. \n";
1616     print STDERR "  $deltokcnt discarded tokens in $delblkcnt blocks.\n";
1617     print STDERR "  $addtokcnt appended  tokens in $addblkcnt blocks.\n";
1618   }
1619
1620   return(@$retval);
1621 }
1622
1623 # marktags($openmark,$closemark,$open,$close,$opencmd,$closecmd,$comment,\@block)
1624 # returns ($openmark,$open,$block,$close,$closemark) if @block only contains no commands (except white-listed ones),
1625 # braces, ampersands, or comments
1626 # mark comments with $comment
1627 # exclude all other exceptions from scope of open, close like this
1628 # ($openmark, $open,...,$close, $opencomd,command, command,$closecmd, $open, ..., $close, $closemark)
1629 # If $opencmd begins with "%" marktags assumes it is operating on a deleted block, otherwise on an added block
1630 sub marktags {
1631   my ($openmark,$closemark,$open,$close,$opencmd,$closecmd,$comment,$block)=@_;
1632   my $word;
1633   my (@argtext);
1634   my $retval=[];
1635   my $noncomment=0;
1636   my $cmd=-1;    # -1 at beginning 0: last token written is a ordinary word
1637                  # 1: last token written is a command
1638                 # for keeping track whether we are just in a command sequence or in a word sequence
1639   my $cmdcomment= ($opencmd =~ m/^%/);  # Flag to indicate whether opencmd is a comment (i.e. if we intend to simply comment out changed commands)
1640   my ($command,$commandword,$closingbracket) ; # temporary variables needed below to remember sub-pattern matches
1641
1642 # split this block to flatten out sequences joined in pass1
1643   @$block=splitlatex(join "",@$block);
1644   foreach (@$block) {
1645     $word=$_;
1646     if ( $word =~ s/^%/%$comment/ ) {
1647       # a comment
1648       if ($cmd==1) {
1649         push (@$retval,$closecmd) ;
1650         $cmd=-1;
1651       }
1652       push (@$retval,$word);
1653       next;
1654     }
1655     if (! $noncomment) {
1656       push (@$retval,$openmark);
1657       $noncomment=1;
1658     }
1659     # negative lookahead pattern (?!) in second clause is put in to avoid mathcing \( .. \) patterns
1660     if ( $word =~ /^[&{}\[\]]/ || ( $word =~ /^\\(?!\()([\w*@]*)/ && !iscmd($1,\@SAFECMDLIST,\@SAFECMDEXCL)) ) {
1661       # word is a command or other significant token (not in SAFECMDLIST)
1662         ## same conditions as in subroutine extractcommand:
1663         # check if token is an alphanumeric command sequence with at least one non-optional argument
1664         # \cmd[...]{...}{last argument}
1665         # Capturing in the following results in these associations
1666         # $1: \cmd[...]{...}{
1667         # $2: \cmd
1668         # $3: last argument
1669         # $4: }  + trailing spaces
1670         ### pre-0.3    if ( ( $token =~ m/^(\\([\w\d\*]+)(?:\[$brat0\]|\{$pat4\})*\{)($pat4)(\}\s*)$/so )
1671       if ( ( $word =~ m/^(\\([\w\d\*]+)(?:${extraspace}\[$brat0\]|${extraspace}\{$pat4\})*${extraspace}\{)($pat4)(\}\s*)$/so )
1672            && iscmd($2,\@TEXTCMDLIST,\@TEXTCMDEXCL)  && ( !$cmdcomment || !iscmd($2,\@CONTEXT2CMDLIST, \@CONTEXT2CMDEXCL) )  ) {
1673         # word is a text command - we mark up the interior of the word. But if we are in a deleted block ($cmdcomment=1) and
1674         # $2 (the command) is in context2, just treat it as an ordinary command (i.e. comment it open with $opencmd)
1675         # Because we do not want to disable this command
1676         # here we do not use $opencmd and $closecmd($opencmd is empty)
1677         if ($cmd==1) {
1678           push (@$retval,$closecmd) ;
1679         } elsif ($cmd==0) {
1680           push (@$retval,$close) ;
1681         }
1682         $command=$1; $commandword=$2; $closingbracket=$4;
1683         @argtext=splitlatex($3);   # split textual argument into tokens
1684         # and mark it up (but we do not need openmark and closemark)
1685         # insert command with initial arguments, marked-up final argument, and closing bracket
1686         if ( $cmdcomment && iscmd($commandword,\@CONTEXT1CMDLIST, \@CONTEXT1CMDEXCL) ) {
1687           # context1cmd in a deleted environment; delete command itself but keep last argument, marked up
1688           push (@$retval,$opencmd);
1689           $command =~ s/\n/\n${opencmd}/sg ; # repeat opencmd at the beginning of each line
1690           # argument, note that the additional comment character is included
1691           # to suppress linebreak after opening parentheses, which is important
1692           # for latexrevise
1693           push (@$retval,$command,"%\n{$AUXCMD\n",marktags("","",$open,$close,$opencmd,$closecmd,$comment,\@argtext),$closingbracket);
1694         } else {
1695           # normal textcmd or context1cmd in an added block
1696           push (@$retval,$command,marktags("","",$open,$close,$opencmd,$closecmd,$comment,\@argtext),$closingbracket);
1697         }
1698         push (@$retval,$AUXCMD,"\n") if $cmdcomment ;
1699         $cmd=-1 ;
1700       } else {
1701         # ordinary command
1702         push (@$retval,$opencmd) if $cmd==-1 ;
1703         push (@$retval,$close,$opencmd) if $cmd==0 ;
1704         $word =~ s/\n/\n${opencmd}/sg if $cmdcomment ;   # if opencmd is a comment, repeat this at the beginning of every line
1705         push (@$retval,$word);
1706         $cmd=1;
1707       }
1708     } else {
1709       # just an ordinary word or word in SAFECMD
1710       push (@$retval,$open) if $cmd==-1 ;
1711       push (@$retval,$closecmd,$open) if $cmd==1 ;
1712       push (@$retval,$word);
1713       $cmd=0;
1714     }
1715   }
1716   push (@$retval,$close) if $cmd==0;
1717   push (@$retval,$closecmd) if $cmd==1;
1718
1719   push (@$retval,$closemark) if ($noncomment);
1720   return @$retval;
1721 }
1722
1723 # preprocess($string, ..)
1724 # carry out the following pre-processing steps for all arguments:
1725 # 1. Remove leading white-space
1726 # 2. mark all first empty line (in block of several) with \PAR tokens
1727 # 3. Convert all '\%' into '\PERCENTAGE ' to make parsing regular expressions easier
1728 # 4. Convert all \verb|some verbatim text| commands (where | can be an arbitrary character)
1729 #    into \verb{hash}
1730 # 5. Convert \begin{verbatim} some verbatim text \end{verbatim} into \verbatim{hash}
1731 # 6. Convert _n into \SUBSCRIPTNB{n} and _{nnn} into \SUBSCRIPT{nn}
1732 # 7. Convert ^n into \SUPERSCRIPTNB{n} and ^{nnn} into \SUPERSCRIPT{nn}
1733 # 8. a. Convert $$ $$ into \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR}
1734 #    b. Convert \[ \] into \begin{SQUAREBRACKET} \end{SQUAREBRACKET}
1735
1736 # 9. Add final token STOP to the very end.  This is put in because the algorithm works better if the last token is identical.  This is removed again in postprocessing.
1737 #
1738 # NB: step 6 and 7 is likely to  convert some "_" inappropriately, e.g. in file
1739 #     names or labels but it does not matter because they are converted back in the postprocessing step
1740 # Returns: leading white space removed in step 1
1741 sub preprocess {
1742   my @leadin=() ;
1743   for (@_) {
1744     s/^(\s*)//s;
1745     push(@leadin,$1);
1746     s/\n(\s*?)\n((?:\s*\n)*)/\n$1\\PAR\n$2/g ;
1747     s/(?<!\\)\\%/\\PERCENTAGE /g ;  # (?<! is negative lookbehind assertion to prevent \\% from being converted
1748     s/(\\verb\*?)(\S)(.*?)\2/"${1}{". tohash(\%verbhash,"${2}${3}${2}") ."}"/esg;
1749     s/\\begin\{(verbatim\*?)\}(.*?)\\end\{\1\}/"\\${1}{". tohash(\%verbhash,"${2}") . "}"/esg;
1750     # Convert _n or _\cmd into \SUBSCRIPTNB{n} or \SUBSCRIPTNB{\cmd} and _{nnn} into \SUBSCRIPT{nn}
1751     s/_([^{\\]|\\\w+)/\\SUBSCRIPTNB{$1}/g ;
1752     s/_{($pat4)}/\\SUBSCRIPT{$1}/g ;
1753     # Convert ^n into \SUPERSCRIPTNB{n} and ^{nnn} into \SUPERSCRIPT{nn}
1754     s/\^([^{\\]|\\\w+)/\\SUPERSCRIPTNB{$1}/g ;
1755     s/\^{($pat4)}/\\SUPERSCRIPT{$1}/g ;
1756     # Convert $$ $$ into \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR}
1757     s/\$\$(.*?)\$\$/\\begin{DOLLARDOLLAR}$1\\end{DOLLARDOLLAR}/sg;
1758     # Convert \[ \] into \begin{SQUAREBRACKET} \end{SQUAREBRACKET}
1759     s/(?<!\\)\\\[/\\begin{SQUAREBRACKET}/sg;
1760     s/\\\]/\\end{SQUAREBRACKET}/sg;
1761     # add final token " STOP"
1762     $_ .= " STOP"
1763   }
1764   return(@leadin);
1765 }
1766
1767 #hashstring=tohash(\%hash,$string)
1768 # creates a hash value based on string and stores in %hash
1769 sub tohash {
1770   my ($hash,$string)=@_;
1771   my (@arr,$val);
1772   my ($sum,$i)=(0,1);
1773   my ($hstr);
1774
1775   @arr=unpack('c*',$string);
1776
1777   foreach $val (@arr) {
1778     $sum += $i*$val;
1779     $i++;
1780   }
1781   $hstr= "$sum";
1782   if (defined($hash->{$hstr}) && $string ne $hash->{$hstr}) {
1783     warn "Repeated hash value for verbatim mode in spite of different content.";
1784     $hstr="-$hstr";
1785   }
1786   $hash->{$hstr}=$string;
1787   return($hstr);
1788 }
1789
1790 #string=fromhash(\%hash,$fromstring)
1791 # restores string value stored in hash
1792 #string=fromhash(\%hash,$fromstring,$prependstring)
1793 # additionally begins each line with prependstring
1794 sub fromhash {
1795   my ($hash,$hstr)=($_[0],$_[1]);
1796   my $retstr=$hash->{$hstr};
1797   if ( $#_ >= 2) {
1798     $retstr =~ s/^/$_[2]/mg;
1799   }
1800   return $retstr;
1801 }
1802
1803
1804 # postprocess($string, ..)
1805 # carry out the following post-processing steps for all arguments:
1806 # * Remove STOP token from the end
1807 # * Replace \RIGHTBRACE by }
1808 # 1. Check all deleted blocks:
1809 #    a.where a deleted block contains a matching \begin and
1810 #      \end environment (these will be disabled by a %DIFDELCMD statements), for selected environment enable enable
1811 #      these commands again (such that for example displayed math in a deleted equation
1812 #      is properly within math mode.  For math mode environments replace numbered equation
1813 #      environments with their display only variety (so that equation numbers in new file and
1814 #      diff file are identical).  Where the correct type of math environment cannot be determined
1815 #      use a place holder MATHMODE
1816 #    b.where one of the commands matching $COUNTERCMD is used as a DIFAUXCMD, add a statement
1817 #      subtracting one from the respective counter to keep numbering consistent with new file
1818 #    Replace all MATHMODE environment commands by the correct environment to achieve matching
1819 #    pairs
1820 #    c. If in-line math mode contains array environment, enclose the whole environment in \mbox'es
1821 # 2. Remove DIFadd, DIFdel, DIFFaddbegin , ... from picture environments
1822 # 3. Convert DIFadd, DIFdel, DIFFaddbegin , ... into FL varieties
1823 #    within floats (currently recognised float environments: plate,table,figure
1824 #    plus starred varieties).
1825 # 4. Remove empty %DIFDELCMD < lines
1826 # 4. Convert \begin{SQUAREBRACKET} \end{SQUAREBRACKET} into \[ \]
1827 #    Convert \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR} into $$ $$
1828 # 5. Convert  \SUPERSCRIPTNB{n} into ^n  and  \SUPERSCRIPT{nn} into ^{nnn}
1829 # 6. Convert  \SUBSCRIPTNB{n} into _n  and  \SUBCRIPT{nn} into _{nnn}
1830 # 7. Expand hashes of verb and verbatim environments
1831 # 8. Convert '\PERCENTAGE ' back into '\%'
1832 # 9.. remove all \PAR tokens
1833 # 10.  package specific processing:  endfloat: make sure \begin{figure} and \end{figure} are always
1834 #      on a line by themselves, similarly for table environment
1835 #  4, undo renaming of the \begin and \end in comments
1836
1837 #
1838 # Note have to manually synchronize substitution commands below and
1839 # DIF.. command names in the header
1840 sub postprocess {
1841   my ($begin,$len,$cnt,$float,$delblock,$addblock);
1842   # second level blocks
1843   my ($begin2,$cnt2,$len2,$eqarrayblock,$mathblock);
1844
1845   for (@_) {
1846
1847     # change $'s in comments to something harmless
1848     1 while s/(%.*)\$/$1DOLLARDIF/mg ;
1849
1850     # Remove final STOP token
1851     s/ STOP$//;
1852     # Replace \RIGHTBRACE by }
1853     s/\\RIGHTBRACE/}/g;
1854     # Check all deleted blocks: where a deleted block contains a matching \begin and
1855     #    \end environment (these will be disabled by a %DIFDELCMD statements), enable
1856     #    these commands again (such that for example displayed math in a deleted equation
1857     #    is properly within math mode.  For math mode environments replace numbered equation
1858     #    environments with their display only variety (so that equation numbers in new file and
1859     #    diff file are identical
1860     while ( m/\\DIFdelbegin.*?\\DIFdelend/sg ) {
1861       $cnt=0;
1862       $len=length($&);
1863       $begin=pos($_) - $len;
1864       $delblock=$&;
1865
1866
1867       ### (.*?[^\n]?)\n? construct is necessary to avoid empty lines in math mode, which result in
1868       ### an error
1869       # displayed math environments
1870       $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHREPL}$AUXCMD\n$1$3\n\\end{$MATHREPL}$AUXCMD\n$4/sg;
1871       # also transform the opposite pair \end{displaymath} .. \begin{displaymath} but we have to be careful not to interfere with the results of the transformation in the line directly above
1872       $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/\\end{MATHMODE}$AUXCMD\n$1$3\n\\begin{MATHMODE}$AUXCMD\n$4/sg;
1873
1874       # now look for unpaired %DIFDELCMD < \begin{MATHENV}; if found add \begin{$MATHREPL} and insert \end{$MATHREPL}
1875       # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted
1876       if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\begin{$MATHREPL}$AUXCMD\n/sg ) {
1877         $delblock =~ s/(\\DIFdelend$)/\\end{$MATHREPL}$AUXCMD\n$1/s ;
1878       }
1879       # now look for unpaired %DIFDELCMD < \end{MATHENV}; if found add \end{MATHMODE} and insert \begin{MATHMODE}
1880       # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted
1881       if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{((?:$MATHENV)|SQUAREBRACKET)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\end{MATHMODE}$AUXCMD\n/sg ) {
1882         $delblock =~ s/(\\DIFdelend$)/\\begin{MATHMODE}$AUXCMD\n$1/s ;
1883       }
1884
1885
1886       # equation array environment
1887       $delblock=~ s/(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(\%DIFDELCMD < \s*\\end\{\2\})/\\begin{$MATHARRREPL}$AUXCMD\n$1$3\n\\end{$MATHARRREPL}$AUXCMD\n$4/sg;
1888       $delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))(.*?[^\n]?)\n?(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{\2\})/\\end{MATHMODE}$AUXCMD\n$1$3\n\\begin{MATHMODE}$AUXCMD\n$4/sg;
1889
1890       # now look for unpaired %DIFDELCMD < \begin{MATHARRENV}; if found add \begin{$MATHARRREPL} and insert \end{$MATHARRREPL}
1891       # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted
1892       if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\begin\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\begin{$MATHARRREPL}$AUXCMD\n/sg ) {
1893         $delblock =~ s/(\\DIFdelend$)/\\end{$MATHARRREPL}$AUXCMD\n$1/s ;
1894       }
1895       # now look for unpaired %DIFDELCMD < \end{MATHENV}; if found add \end{MATHMODE} and insert \begin{MATHMODE}
1896       # just before end of block; again we use look-behind assertion to avoid matching constructions which have already been converted
1897       if ($delblock=~ s/(?<!${AUXCMD}\n)(\%DIFDELCMD < \s*\\end\{($MATHARRENV)\}\s*?(?:$DELCMDCLOSE|\n))/$1\\end{MATHMODE}$AUXCMD\n/sg ) {
1898         $delblock =~ s/(\\DIFdelend$)/\\begin{MATHMODE}$AUXCMD\n$1/s ;
1899       }
1900
1901       # parse $delblock for deleted and reinstated eqnarray* environments - within those reinstate \\ and & commands
1902       while ( $delblock =~ m/\\begin\Q{$MATHARRREPL}$AUXCMD\E\n.*?\n\\end\Q{$MATHARRREPL}$AUXCMD\E\n/sg ) {
1903         $cnt2=0;
1904         $len2=length($&);
1905         $begin2=pos($delblock) - $len2;
1906         $eqarrayblock=$&;
1907         # reinstate deleted & and \\ commands
1908         $eqarrayblock=~ s/(\%DIFDELCMD < \s*(\&|\\\\)\s*?(?:$DELCMDCLOSE|\n))/$1$2$AUXCMD\n/sg ;
1909
1910         substr($delblock,$begin2,$len2)=$eqarrayblock;
1911         pos($delblock) = $begin2 + length($eqarrayblock);
1912       }
1913
1914
1915
1916 #    b.where one of the commands matching $COUNTERCMD is used as a DIFAUXCMD, add a statement
1917 #      subtracting one from the respective counter to keep numbering consistent with new file
1918       $delblock=~ s/\\($COUNTERCMD)((?:${extraspace}\[$brat0\]${extraspace}|${extraspace}\{$pat4\})*\s*${AUXCMD}\n)/\\$1$2\\addtocounter{$1}{-1}${AUXCMD}\n/sg ;
1919 #     c. If in-line math mode contains array environment, enclose the whole environment in \mbox'es
1920       while ( $delblock =~ m/($math)(\s*)/sg ) {
1921         $cnt2=0;
1922         $len2=length($&);
1923         $begin2=pos($delblock) - $len2;
1924         $mathblock="%\n\\mbox{$AUXCMD\n$1\n}$AUXCMD\n";
1925         next unless $mathblock =~ m/\{$ARRENV\}/ ;
1926         substr($delblock,$begin2,$len2)=$mathblock;
1927         pos($delblock) = $begin2 + length($mathblock);
1928       }
1929
1930 #     splice in modified delblock
1931       substr($_,$begin,$len)=$delblock;
1932       pos = $begin + length($delblock);
1933     }
1934     # make the array modification in added blocks
1935     while ( m/\\DIFaddbegin.*?\\DIFaddend/sg ) {
1936       $cnt=0;
1937       $len=length($&);
1938       $begin=pos($_) - $len;
1939       $addblock=$&;
1940       while ( $addblock =~ m/($math)(\s*)/sg ) {
1941         $cnt2=0;
1942         $len2=length($&);
1943         $begin2=pos($addblock) - $len2;
1944         $mathblock="%\n\\mbox{$AUXCMD\n$1\n}$AUXCMD\n";
1945         next unless $mathblock =~ m/\{$ARRENV\}/ ;
1946         substr($addblock,$begin2,$len2)=$mathblock;
1947         pos($addblock) = $begin2 + length($mathblock);
1948       }
1949
1950 #     splice in modified addblock
1951       substr($_,$begin,$len)=$addblock;
1952       pos = $begin + length($addblock);
1953     }
1954
1955
1956     ### old place for BEGINDIF, ENDDIF replacement
1957     # change begin and end commands  within comments such that they
1958     # don't disturb the pattern matching (if there are several \begin or \end in one line
1959     # this substitution is insufficient but that appears unlikely)
1960     s/(%.*)\\begin\{(.*)$/$1\\BEGINDIF\{$2/mg ;
1961     s/(%.*)\\end\{(.*)$/$1\\ENDDIF\{$2/mg ;
1962
1963     # Replace MATHMODE environments from step 1a above by the correct Math environment
1964
1965     # The next line is complicated.  The negative look-ahead insertion makes sure that no \end{$MATHENV} (or other mathematical
1966     # environments) are between the \begin{$MATHENV} and \end{MATHMODE} commands. This is necessary as the minimal matching
1967     # is not globally minimal but only 'locally' (matching is beginning from the left side of the string)
1968     1 while s/\\begin{((?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}((?:.(?!(?:\\end{(?:(?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}|\\begin{MATHMODE})))*?)\\end{MATHMODE}/\\begin{$1}$2\\end{$1}/s;
1969     1 while s/\\begin{MATHMODE}((?:.(?!\\end{MATHMODE}))*?)\\end{((?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}/\\begin{$2}$1\\end{$2}/s;
1970     # convert remaining \begin{MATHMODE} \end{MATHMODE} (and not containing & or \\ )into MATHREPL environments
1971     s/\\begin{MATHMODE}((?:(.(?!(?<!\\)\&|\\\\))*)?)\\end{MATHMODE}/\\begin{$MATHREPL}$1\\end{$MATHREPL}/sg;
1972     # others into MATHARRREPL
1973     s/\\begin{MATHMODE}(.*?)\\end{MATHMODE}/\\begin{$MATHARRREPL}$1\\end{$MATHARRREPL}/sg;
1974
1975     # now look for AUXCMD math-mode pairs which have only comments (or empty lines between them), and remove the added commands
1976     s/\\begin{((?:$MATHENV)|(?:$MATHARRENV)|SQUAREBRACKET)}$AUXCMD\n((?:\s*%.[^\n]*\n)*)\\end{\1}$AUXCMD\n/$2/sg;
1977
1978
1979     # Remove all mark up within picture environments
1980     while ( m/\\begin\{($PICTUREENV)\}.*?\\end\{\1\}/sg ) {
1981       $cnt=0;
1982       $len=length($&);
1983       $begin=pos($_) - $len;
1984       $float=$&;
1985       $float =~ s/\\DIFaddbegin //g;
1986       $float =~ s/\\DIFaddend //g;
1987       $float =~ s/\\DIFadd\{($pat4)\}/$1/g;
1988       $float =~ s/\\DIFdelbegin //g;
1989       $float =~ s/\\DIFdelend //g;
1990       $float =~ s/\\DIFdel\{($pat4)\}//g;
1991       substr($_,$begin,$len)=$float;
1992       pos = $begin + length($float);
1993     }
1994     # Convert DIFadd, DIFdel, DIFFaddbegin , ... into  varieties
1995     #    within floats (currently recognised float environments: plate,table,figure
1996     #    plus starred varieties).
1997     while ( m/\\begin\{($FLOATENV)\}.*?\\end\{\1\}/sg ) {
1998       $cnt=0;
1999       $len=length($&);
2000       $begin=pos($_) - $len;
2001       $float=$&;
2002       $float =~ s/\\DIFaddbegin /\\DIFaddbeginFL /g;
2003       $float =~ s/\\DIFaddend /\\DIFaddendFL /g;
2004       $float =~ s/\\DIFadd\{/\\DIFaddFL{/g;
2005       $float =~ s/\\DIFdelbegin /\\DIFdelbeginFL /g;
2006       $float =~ s/\\DIFdelend /\\DIFdelendFL /g;
2007       $float =~ s/\\DIFdel\{/\\DIFdelFL{/g;
2008       substr($_,$begin,$len)=$float;
2009       pos = $begin + length($float);
2010     }
2011     ### former location of undo renaming of \begin and \end in comments
2012
2013     # remove empty DIFCMD < lines
2014     s/^\Q${DELCMDOPEN}\E\n//msg;
2015
2016     # Expand hashes of verb and verbatim environments (note negative look behind assertion to not leak out of DIFDELCMD comments
2017     s/(\\verb\*?)\{([-\d]*?)\}/"${1}". fromhash(\%verbhash,$2)/esg;
2018     s/${DELCMDOPEN}\\(verbatim\*?)\{([-\d]*?)\}/"${DELCMDOPEN}\\begin{${1}}".fromhash(\%verbhash,$2,$DELCMDOPEN)."${DELCMDOPEN}\\end{${1}}"/esg;
2019     s/\\(verbatim\*?)\{([-\d]*?)\}/"\\begin{${1}}".fromhash(\%verbhash,$2)."\\end{${1}}"/esg;
2020     # Convert '\PERCENTAGE ' back into '\%'
2021     s/\\PERCENTAGE /\\%/g;
2022     # remove all \PAR tokens (taking care to properly keep commented out PAR's
2023     # from introducing uncommented newlines - next line)
2024     s/(%DIF < )([^\n]*?)\\PAR\n/$1$2\n$1\n/sg;
2025     # convert PAR commands which are on a line by themselves
2026     s/\n(\s*?)\\PAR\n/\n\n/sg;
2027     # convert remaining PAR commands (which are preceded by non-white space characters, usually "}" ($ADDCLOSE)
2028     s/\\PAR\n/\n\n/sg;
2029
2030     #  package specific processing:
2031     if ( defined($packages{"endfloat"})) {
2032       #endfloat: make sure \begin{figure} and \end{figure} are always
2033       #      on a line by themselves, similarly for table environment
2034       print STDERR "endfloat package detected.\n" if $verbose ;
2035       # eliminate whitespace before and after
2036       s/^(\s*)(\\(?:begin|end)\{(?:figure|table)\})(\s*)$/$2/mg;
2037       # split lines with remaining characters before float enviroment conmmand
2038       s/^(.+)(\\(?:begin|end)\{(?:figure|table)\})/$1\n$2/mg;
2039       # split lines with remaining characters after float enviroment conmmand
2040       s/(\\(?:begin|end)\{(?:figure|table)\})(.+)$/$1\n$2/mg;
2041     }
2042     # undo renaming of the \begin and \end  and dollars in comments
2043     s/(%.*)\\BEGINDIF\{(.*)$/$1\\begin\{$2/mg ;
2044     s/(%.*)\\ENDDIF\{(.*)$/$1\\end\{$2/mg ;
2045     1 while s/(%.*)DOLLARDIF/$1\$/mg ;
2046 #   Convert \begin{SQUAREBRACKET} \end{SQUAREBRACKET} into \[ \]
2047     s/\\end{SQUAREBRACKET}/\\\]/sg;
2048     s/\\begin{SQUAREBRACKET}/\\\[/sg;
2049 # 4. Convert \begin{DOLLARDOLLAR} \end{DOLLARDOLLAR} into $$ $$
2050     s/\\begin\{DOLLARDOLLAR\}(.*?)\\end\{DOLLARDOLLAR\}/\$\$$1\$\$/sg;
2051 # 5. Convert  \SUPERSCRIPTNB{n} into ^n  and  \SUPERSCRIPT{nn} into ^{nnn}
2052     s/\\SUPERSCRIPTNB{($pat0)}/^$1/g ;
2053     s/\\SUPERSCRIPT{($pat4)}/^{$1}/g ;
2054     # Convert  \SUBSCRIPNB{n} into _n  and  \SUBCRIPT{nn} into _{nnn}
2055     s/\\SUBSCRIPTNB{($pat0)}/_$1/g ;
2056     s/\\SUBSCRIPT{($pat4)}/_{$1}/g ;
2057
2058   return;
2059   }
2060 }
2061
2062
2063 # @diffs=linediff(\@seq1, \@seq2)
2064 # mark up lines like this
2065 #%DIF mm-mmdnn
2066 #%< old deleted line(s)
2067 #%DIF -------
2068 #%DIF mmann-nn
2069 #new appended line %<
2070 #%DIF -------
2071 # Future extension: mark change explicitly
2072 # Assumes: traverse_sequence traverses deletions before insertions in changed sequences
2073 #          all line numbers relative to line 0 (first line of real file)
2074 sub linediff {
2075   my $seq1 = shift ;
2076   my $seq2 = shift ;
2077
2078   my $block = [];
2079   my $retseq = [];
2080   my @begin=('','',''); # dummy initialisation
2081   my $instring ;
2082
2083   my $discard = sub { @begin=('d',$_[0],$_[1]) unless scalar @$block ;
2084                       push(@$block, "%DIF < " . $seq1->[$_[0]]) };
2085   my $add = sub { if (! scalar  @$block) {
2086                     @begin=('a',$_[0],$_[1]) ;}
2087                   elsif ( $begin[0] eq 'd' ) {
2088                     $begin[0]='c'; $begin[2]=$_[1];
2089                     push(@$block, "%DIF -------") }
2090                   push(@$block,  $seq2->[$_[1]] . " %DIF > " ) };
2091   my $match = sub { if ( scalar @$block ) {
2092                       if ( $begin[0] eq 'd' && $begin[1]!=$_[0]-1) {
2093                         $instring = sprintf "%%DIF %d-%dd%d",$begin[1],$_[0]-1,$begin[2]; }
2094                       elsif ( $begin[0] eq 'a' && $begin[2]!=$_[1]-1) {
2095                         $instring = sprintf "%%DIF %da%d-%d",$begin[1],$begin[2],$_[1]-1; }
2096                       elsif ( $begin[0] eq 'c' ) {
2097                         $instring = sprintf "%%DIF %sc%s",
2098                                              ($begin[1]==$_[0]-1) ? "$begin[1]" : $begin[1]."-".($_[0]-1)  ,
2099                                              ($begin[2]==$_[1]-1) ? "$begin[2]" : $begin[2]."-".($_[1]-1)  ; }
2100                       else {
2101                         $instring = sprintf "%%DIF %d%s%d",$begin[1],$begin[0],$begin[2]; }
2102                       push @$retseq, $instring,@$block, "%DIF -------" ;
2103                       $block = [];
2104                     }
2105                     push @$retseq, $seq2->[$_[1]]
2106                   };
2107   # key function: remove multiple spaces (such that insertion or deletion of redundant white space is not reported)
2108   my $keyfunc = sub { join("  ",split(" ",shift())) };
2109
2110   traverse_sequences($seq1,$seq2, { MATCH=>$match, DISCARD_A=>$discard, DISCARD_B=>$add }, $keyfunc );
2111   push @$retseq, @$block if scalar @$block;
2112
2113   return wantarray ? @$retseq : $retseq ;
2114 }
2115
2116
2117
2118 # init_regex_arr_data(\@array,"TOKEN INIT")
2119 # scans DATA file handel for line "%% TOKEN INIT" line
2120 # then appends each line not beginning with % into array (as a quoted regex)
2121 sub init_regex_arr_data {
2122   my ($arr,$token)=@_;
2123   my ($copy);
2124   while (<DATA>) {
2125     if ( m/^%%BEGIN $token\s*$/ ) {
2126       $copy=1; }
2127     elsif ( m/^%%END $token\s*/ )  {
2128       last; }
2129     chomp;
2130     push (@$arr,qr/^$_$/) if ( $copy && !/^%/ ) ;
2131   }
2132   seek DATA,0,0;    # rewind DATA handle to file begin
2133 }
2134
2135
2136 # init_regex_arr_ext(\@array,$arg)
2137 # fills array with regular expressions.
2138 # if arg is a file name, then read in list of regular expressions from that file
2139 # (one expression per line)
2140 # Otherwise treat arg as a comma separated list of regular expressions
2141 sub init_regex_arr_ext {
2142   my ($arr,$arg)=@_;
2143   my $regex;
2144   if ( -f $ arg ) {
2145     open(FILE,"$arg") or die ("Couldn't open $arg: $!");
2146     while (<FILE>) {
2147       chomp;
2148       next if /^\s*#/ || /^\s*%/ || /^\s*$/ ;
2149       push (@$arr,qr/^$_$/);
2150     }
2151     close(FILE);
2152   }
2153   else {
2154     # assume it is a comma-separated list of reg-ex
2155     foreach $regex (split(qr/(?<!\\),/,$arg)) {
2156       $regex =~ s/\\,/,/g;
2157       push (@$arr,qr/^$regex$/);
2158     }
2159   }
2160 }
2161
2162 #exetime() returns time since last execution of this command
2163 #exetime(1) resets this time
2164 my $lasttime=-1;   # global variable for persistence
2165 sub exetime {
2166   my $reset=0;
2167   my $retval;
2168   if ((scalar @_) >=1) {
2169     $reset=shift;
2170   }
2171   if ($reset) {
2172     $lasttime=times();
2173   }
2174   else {
2175     $retval=times()-$lasttime;
2176     $lasttime=$lasttime+$retval;
2177     return($retval);
2178   }
2179 }
2180
2181
2182 sub usage {
2183   die <<"EOF";
2184 Usage: $0 [options] old.tex new.tex > diff.tex
2185
2186 Compares two latex files and writes tex code to stdout, which has the same
2187 format as new.tex but has all changes relative to old.tex marked up or commented.
2188
2189 --type=markupstyle
2190 -t markupstyle         Add code to preamble for selected markup style
2191                        Available styles: UNDERLINE CTRADITIONAL TRADITIONAL CFONT FONTSTRIKE INVISIBLE
2192                                          CHANGEBAR CCHANGEBAR CULINECHBAR CFONTCBHBAR
2193                        [ Default: UNDERLINE ]
2194
2195 --subtype=markstyle
2196 -s markstyle           Add code to preamble for selected style for bracketing
2197                        commands (e.g. to mark changes in  margin)
2198                        Available styles: SAFE MARGINAL DVIPSCOL
2199                        [ Default: SAFE ]
2200
2201 --floattype=markstyle
2202 -f markstyle           Add code to preamble for selected style which
2203                        replace standard marking and markup commands within floats
2204                        (e.g., marginal remarks cause an error within floats
2205                        so marginal marking can be disabled thus)
2206                        Available styles: FLOATSAFE IDENTICAL
2207                        [ Default: FLOATSAFE ]
2208
2209 --encoding=enc
2210 -e enc                 Specify encoding of old.tex and new.tex. Typical encodings are
2211                        ascii, utf8, latin1, latin9.  A list of available encodings can be
2212                        obtained by executing
2213                        perl -MEncode -e 'print join ("\\n",Encode->encodings( ":all" )) ;'
2214                        [Default encoding is utf8 unless the first few lines of the preamble contain
2215                        an invocation "\\usepackage[..]{inputenc} in which case the
2216                        encoding chosen by this command is asssumed. Note that ASCII (standard
2217                        latex) is a subset of utf8]
2218
2219 --preamble=file
2220 -p file                Insert file at end of preamble instead of auto-generating
2221                        preamble.  The preamble must define the following commands
2222                        \\DIFaddbegin,\\DIFaddend,\\DIFadd{..},
2223                        \\DIFdelbegin,\\DIFdelend,\\DIFdel{..},
2224                        and varieties for use within floats
2225                        \\DIFaddbeginFL,\\DIFaddendFL,\\DIFaddFL{..},
2226                        \\DIFdelbeginFL,\\DIFdelendFL,\\DIFdelFL{..}
2227                        (If this option is set -t, -s, and -f options
2228                        are ignored.)
2229
2230 --exclude-safecmd=exclude-file
2231 --exclude-safecmd="cmd1,cmd2,..."
2232 -A exclude-file
2233 --replace-safecmd=replace-file
2234 --append-safecmd=append-file
2235 --append-safecmd="cmd1,cmd2,..."
2236 -a append-file         Exclude from, replace or append to the list of regex
2237                        matching commands which are safe to use within the
2238                        scope of a \\DIFadd or \\DIFdel command.  The file must contain
2239                        one Perl-RegEx per line (Comment lines beginning with # or % are
2240                        ignored). A literal comma within the comma-separated list must be
2241                        escaped thus "\\,",   Note that the RegEx needs to match the whole of
2242                        the token, i.e., /^regex\$/ is implied and that the initial
2243                        "\\" of the command is not included. The --exclude-safecmd
2244                        and --append-safecmd options can be combined with the --replace-safecmd
2245                        option and can be used repeatedly to add cumulatively to the lists.
2246
2247 --exclude-textcmd=exclude-file
2248 --exclude-textcmd="cmd1,cmd2,..."
2249 -X exclude-file
2250 --replace-textcmd=replace-file
2251 --append-textcmd=append-file
2252 --append-textcmd="cmd1,cmd2,..."
2253 -x append-file         Exclude from, replace or append to the list of regex
2254                        matching commands whose last argument is text.  See
2255                        entry for --exclude-safecmd directly above for further details.
2256
2257 --replace-context1cmd=replace-file
2258 --append-context1cmd=append-file
2259 --append-context1cmd="cmd1,cmd2,..."
2260                        Replace or append to the list of regex matching commands
2261                        whose last argument is text but which require a particular
2262                        context to work, e.g. \\caption will only work within a figure
2263                        or table.  These commands behave like text commands, except when
2264                        they occur in a deleted section, when they are disabled, but their
2265                        argument is shown as deleted text.
2266
2267 --replace-context2cmd=replace-file
2268 --append-context2cmd=append-file
2269 --append-context2cmd="cmd1,cmd2,..."
2270                        As corresponding commands for context1.  The only difference is that
2271                        context2 commands are completely disabled in deleted sections, including
2272                        their arguments.
2273
2274
2275 --config var1=val1,var2=val2,...
2276 -c var1=val1,..        Set configuration variables.
2277 -c configfile           Available variables:
2278                           MINWORDSBLOCK (integer)
2279                           FLOATENV (RegEx)
2280                           PICTUREENV (RegEx)
2281                           MATHENV (RegEx)
2282                           MATHREPL (String)
2283                           MATHARRENV (RegEx)
2284                           MATHARRREPL (String)
2285                           ARRENV (RegEx)
2286                           COUNTERCMD (RegEx)
2287                        This option can be repeated.
2288
2289
2290 --packages=pkg1,pkg2,..
2291                        Tell latexdiff that .tex file is processed with the packages in list
2292                        loaded.  This is normally not necessary if the .tex file includes the
2293                        preamble, as the preamble is automatically scanned for \\usepackage commands.
2294                        Use of the --packages option disables automatic scanning, so if for any
2295                        reason package specific parsing needs to be switched off, use --packages=none.
2296                        The following packages trigger special behaviour:
2297                        endfloat hyperref amsmath
2298                        [ Default: scan the preamble for \\usepackage commands to determine
2299                          loaded packages.]
2300
2301 --show-preamble        Print generated or included preamble commands to stdout.
2302
2303 --show-safecmd         Print list of regex matching and excluding safe commands.
2304
2305 --show-textcmd         Print list of regex matching and excluding commands with text argument.
2306
2307 --show-config          Show values of configuration variables
2308
2309 --show-all             Show all of the above
2310
2311    NB For all --show commands, no old.tex or new.tex file needs to be given, and no
2312       differencing takes place.
2313
2314 --allow-spaces         Allow spaces between bracketed or braced arguments to commands
2315                        [Default requires arguments to directly follow each other without
2316                                 intervening spaces]
2317
2318 --flatten              Replace \\input and \\include commands within body by the content
2319                        of the files in their argument.  If \\includeonly is present in the
2320                        preamble, only those files are expanded into the document. However,
2321                        no recursion is done, i.e. \\input and \\include commands within
2322                        included sections are not expanded.  The included files are assumed to
2323                        be located in the same directories as the old and new master files,
2324                        respectively, making it possible to organise files into old and new directories.
2325
2326 --help
2327 -h                     Show this help text.
2328
2329 --ignore-warnings      Suppress warnings about inconsistencies in length between input
2330                        and parsed strings and missing characters.
2331
2332 --verbose
2333 -V                     Output various status information to stderr during processing.
2334                        Default is to work silently.
2335
2336 --version              Show version number.
2337 EOF
2338 }
2339
2340 =head1 NAME
2341
2342 latexdiff - determine and markup differences between two latex files
2343
2344 =head1 SYNOPSIS
2345
2346 B<latexdiff> [ B<OPTIONS> ] F<old.tex> F<new.tex> > F<diff.tex>
2347
2348 =head1 DESCRIPTION
2349
2350 Briefly, I<latexdiff> is a utility program to aid in the management of
2351 revisions of latex documents. It compares two valid latex files, here
2352 called C<old.tex> and C<new.tex>, finds significant differences
2353 between them (i.e., ignoring the number of white spaces and position
2354 of line breaks), and adds special commands to highlight the
2355 differences.  Where visual highlighting is not possible, e.g. for changes
2356 in the formatting, the differences are
2357 nevertheless marked up in the source.
2358
2359 The program treats the preamble differently from the main document.
2360 Differences between the preambles are found using line-based
2361 differencing (similarly to the Unix diff command, but ignoring white
2362 spaces).  A comment, "S<C<%DIF E<gt>>>" is appended to each added line, i.e. a
2363 line present in C<new.tex> but not in C<old.tex>.  Discarded lines
2364  are deactivated by prepending "S<C<%DIF E<lt>>>". Changed blocks are preceded  by
2365 comment lines giving information about line numbers in the original files.  Where there are insignificant
2366 differences, the resulting file C<diff.tex> will be similar to
2367 C<new.tex>.  At the end of the preamble, the definitions for I<latexdiff> markup commands are inserted.
2368 In differencing the main body of the text, I<latexdiff> attempts to
2369 satisfy the following guidelines (in order of priority):
2370
2371 =over 3
2372
2373 =item 1
2374
2375 If both C<old.tex> and C<new.tex> are valid LaTeX, then the resulting
2376 C<diff.tex> should also be valid LateX. (NB If a few plain TeX commands
2377 are used within C<old.tex> or C<new.tex> then C<diff.tex> is not
2378 guaranteed to work but usually will).
2379
2380 =item 2
2381
2382 Significant differences are determined on the level of
2383 individual words. All significant differences, including differences
2384 between comments should be clearly marked in the resulting source code
2385 C<diff.tex>.
2386
2387 =item 3
2388
2389 If a changed passage contains text or text-producing commands, then
2390 running C<diff.tex> through LateX should produce output where added
2391 and discarded passages are highlighted.
2392
2393 =item 4
2394
2395 Where there are insignificant differences, e.g. in the positioning of
2396 line breaks, C<diff.tex> should follow the formatting of C<new.tex>
2397
2398 =back
2399
2400 For differencing the same algorithm as I<diff> is used but words
2401 instead of lines are compared.  An attempt is made to recognize
2402 blocks which are completely changed such that they can be marked up as a unit.
2403 Comments are differenced line by line
2404 but the number of spaces within comments is ignored. Commands including
2405 all their arguments are generally compared as one unit, i.e., no mark-up
2406 is inserted into the arguments of commands.  However, for a selected
2407 number of commands (for example, C<\caption> and all sectioning
2408 commands) the last argument is known to be text. This text is
2409 split into words and differenced just as ordinary text (use options to
2410 show and change the list of text commands, see below). As the
2411 algorithm has no detailed knowledge of LaTeX, it assumes all pairs of
2412 curly braces immediately following a command (i.e. a sequence of
2413 letters beginning with a backslash) are arguments for that command.
2414 As a restriction to condition 1 above it is thus necessary to surround
2415 all arguments with curly braces, and to not insert
2416 extraneous spaces.  For example, write
2417
2418   \section{\textem{This is an emphasized section title}}
2419
2420 and not
2421
2422   \section {\textem{This is an emphasized section title}}
2423
2424 or
2425
2426   \section\textem{This is an emphasized section title}
2427
2428 even though all varieties are the same to LaTeX (but see
2429 B<--allow-spaces> option which allows the second variety).
2430
2431 For environments whose content does not conform to standard LaTeX or
2432 where graphical markup does not make sense all markup commands can be
2433 removed by setting the PICTUREENV configuration variable, set by
2434 default to C<picture> and C<DIFnomarkup> environments; see B<--config>
2435 option).  The latter environment (C<DIFnomarkup>) can be used to
2436 protect parts of the latex file where the markup results in illegal
2437 markup. You have to surround the offending passage in both the old and
2438 new file by C<\begin{DIFnomarkup}> and C<\end{DIFnomarkup}>. You must
2439 define the environment in the preambles of both old and new
2440 documents. I prefer to define it as a null-environment,
2441
2442 C<\newenvironment{DIFnomarkup}{}{}>
2443
2444 but the choice is yours.  Any markup within the environment will be
2445 removed, and generally everything within the environment will just be
2446 taken from the new file.
2447
2448 It is also possible to difference files which do not have a preamble.
2449  In this case, the file is processed in the main document
2450 mode, but the definitions of the markup commands are not inserted.
2451
2452 All markup commands inserted by I<latexdiff> begin with "C<\DIF>".  Added
2453 blocks containing words, commands or comments which are in C<new.tex>
2454 but not in C<old.tex> are marked by C<\DIFaddbegin> and C<\DIFaddend>.
2455 Discarded blocks are marked by C<\DIFdelbegin> and C<\DIFdelend>.
2456 Within added blocks all text is highlighted with C<\DIFadd> like this:
2457 C<\DIFadd{Added text block}>
2458 Selected `safe' commands can be contained in these text blocks as well
2459 (use options to show and change the list of safe commands, see below).
2460 All other commands as well as braces "{" and "}" are never put within
2461 the scope of C<\DIFadd>.  Added comments are marked by prepending
2462 "S<C<%DIF E<gt> >>".
2463
2464 Within deleted blocks text is highlighted with C<\DIFdel>.  Deleted
2465 comments are marked by prepending "S<C<%DIF E<lt> >>".  Non-safe command
2466 and curly braces within deleted blocks are commented out with
2467 "S<C<%DIFDELCMD E<lt> >>".
2468
2469
2470
2471 =head1 OPTIONS
2472
2473 =head2 Preamble
2474
2475 The following options determine the visual markup style by adding the appropriate
2476 command definitions to the preamble. See the end of this section for a description of
2477 available styles.
2478
2479 =over 4
2480
2481 =item B<--type=markupstyle> or
2482 B<-t markupstyle>
2483
2484 Add code to preamble for selected markup style. This option defines
2485 C<\DIFadd> and C<\DIFdel> commands.
2486 Available styles:
2487
2488 C<UNDERLINE CTRADITIONAL TRADITIONAL CFONT FONTSTRIKE INVISIBLE
2489 CHANGEBAR CCHANGEBAR CULINECHBAR CFONTCBHBAR>
2490
2491 [ Default: C<UNDERLINE> ]
2492
2493 =item B<--subtype=markstyle> or
2494 B<-s markstyle>
2495
2496 Add code to preamble for selected style for bracketing
2497 commands (e.g. to mark changes in  margin). This option defines
2498 C<\DIFaddbegin>, C<\DIFaddend>, C<\DIFdelbegin> and C<\DIFdelend> commands.
2499 Available styles: C<SAFE MARGINAL DVIPSCOL>
2500
2501 [ Default: C<SAFE> ]
2502
2503 =item B<--floattype=markstyle> or
2504 B<-f markstyle>
2505
2506 Add code to preamble for selected style which
2507 replace standard marking and markup commands within floats
2508 (e.g., marginal remarks cause an error within floats
2509 so marginal marking can be disabled thus). This option defines all
2510 C<\DIF...FL> commands.
2511 Available styles: C<FLOATSAFE TRADITIONALSAFE IDENTICAL>
2512
2513 [ Default: C<FLOATSAFE> ]
2514
2515 =item B<--encoding=enc> or
2516 B<-e enc>
2517
2518 Specify encoding of old.tex and new.tex. Typical encodings are
2519 C<ascii>, C<utf8>, C<latin1>, C<latin9>.  A list of available encodings can be
2520 obtained by executing
2521
2522 C<perl -MEncode -e 'print join ("\n",Encode->encodings( ":all" )) ;' >
2523
2524 [Default encoding is utf8 unless the first few lines of the preamble contain
2525 an invocation C<\usepackage[..]{inputenc}> in which case the
2526 encoding chosen by this command is asssumed. Note that ASCII (standard
2527 latex) is a subset of utf8]
2528
2529 =item B<--preamble=file> or
2530 B<-p file>
2531
2532 Insert file at end of preamble instead of generating
2533 preamble.  The preamble must define the following commands
2534 C<\DIFaddbegin, \DIFaddend, \DIFadd{..},
2535 \DIFdelbegin,\DIFdelend,\DIFdel{..},>
2536 and varieties for use within floats
2537 C<\DIFaddbeginFL, \DIFaddendFL, \DIFaddFL{..},
2538 \DIFdelbeginFL, \DIFdelendFL, \DIFdelFL{..}>
2539 (If this option is set B<-t>, B<-s>, and B<-f> options
2540 are ignored.)
2541
2542 =item B<--packages=pkg1,pkg2,..>
2543
2544 Tell latexdiff that .tex file is processed with the packages in list
2545 loaded.  This is normally not necessary if the .tex file includes the
2546 preamble, as the preamble is automatically scanned for C<\usepackage> commands.
2547 Use of the B<--packages> option disables automatic scanning, so if for any
2548 reason package specific parsing needs to be switched off, use B<--packages=none>.
2549 The following packages trigger special behaviour:
2550
2551 =over 8
2552
2553 =item C<amsmath>
2554
2555 Configuration variable amsmath is set to C<align*> (Default: C<eqnarray*>)
2556
2557 =item C<endfloat>
2558
2559 Ensure that C<\begin{figure}> and C<\end{figure}> always appear by themselves on a line.
2560
2561 =item C<hyperref>
2562
2563 Change name of C<\DIFadd> and C<\DIFdel> commands to C<\DIFaddtex> and C<\DIFdeltex> and
2564 define new C<\DIFadd> and C<\DIFdel> commands, which provide a wrapper for these commands,
2565 using them for the text but not for the link defining command (where any markup would cause
2566 errors).
2567
2568 =back
2569
2570 [ Default: scan the preamble for C<\\usepackage> commands to determine
2571   loaded packages.]
2572
2573
2574
2575 =item B<--show-preamble>
2576
2577 Print generated or included preamble commands to stdout.
2578
2579 =back
2580
2581 =head2 Configuration
2582
2583 =over 4
2584
2585 =item B<--exclude-safecmd=exclude-file> or
2586 B<-A exclude-file> or  B<--exclude-safecmd="cmd1,cmd2,...">
2587
2588 =item B<--replace-safecmd=replace-file>
2589
2590 =item B<--append-safecmd=append-file> or
2591 B<-a append-file> or B<--append-safecmd="cmd1,cmd2,...">
2592
2593 Exclude from, replace or append to the list of regular expressions (RegEx)
2594 matching commands which are safe to use within the
2595 scope of a C<\DIFadd> or C<\DIFdel> command.  The file must contain
2596 one Perl-RegEx per line (Comment lines beginning with # or % are
2597 ignored).  Note that the RegEx needs to match the whole of
2598 the token, i.e., /^regex$/ is implied and that the initial
2599 "\" of the command is not included.
2600 The B<--exclude-safecmd> and B<--append-safecmd> options can be combined with the -B<--replace-safecmd>
2601 option and can be used repeatedly to add cumulatively to the lists.
2602  B<--exclude-safecmd>
2603 and B<--append-safecmd> can also take a comma separated list as input. If a
2604 comma for one of the regex is required, escape it thus "\,". In most cases it
2605 will be necessary to protect the comma-separated list from the shell by putting
2606 it in quotation marks.
2607
2608 =item B<--exclude-textcmd=exclude-file> or
2609 B<-X exclude-file> or B<--exclude-textcmd="cmd1,cmd2,...">
2610
2611 =item B<--replace-textcmd=replace-file>
2612
2613 =item B<--append-textcmd=append-file> or
2614 B<-x append-file> or B<--append-textcmd="cmd1,cmd2,...">
2615
2616 Exclude from, replace or append to the list of regular expressions
2617 matching commands whose last argument is text.  See
2618 entry for B<--exclude-safecmd> directly above for further details.
2619
2620
2621 =item B<--replace-context1cmd=replace-file>
2622
2623 =item B<--append-context1cmd=append-file> or
2624 =item B<--append-context1cmd="cmd1,cmd2,...">
2625
2626 Replace or append to the list of regex matching commands
2627 whose last argument is text but which require a particular
2628 context to work, e.g. \caption will only work within a figure
2629 or table.  These commands behave like text commands, except when
2630 they occur in a deleted section, when they are disabled, but their
2631 argument is shown as deleted text.
2632
2633 =item B<--replace-context1cmd=replace-file>
2634
2635 =item B<--append-context2cmd=append-file> or
2636 =item B<--append-context2cmd="cmd1,cmd2,...">
2637 As corresponding commands for context1.  The only difference is that
2638 context2 commands are completely disabled in deleted sections, including
2639 their arguments.
2640
2641
2642
2643 =item B<--config var1=val1,var2=val2,...> or B<-c var1=val1,..>
2644
2645 =item B<-c configfile>
2646
2647 Set configuration variables.  The option can be repeated to set different
2648 variables (as an alternative to the comma-separated list).
2649 Available variables (see below for further explanations):
2650
2651 C<MINWORDSBLOCK> (integer)
2652
2653 C<FLOATENV> (RegEx)
2654
2655 C<PICTUREENV> (RegEx)
2656
2657 C<MATHENV> (RegEx)
2658
2659 C<MATHREPL> (String)
2660
2661 C<MATHARRENV> (RegEx)
2662
2663 C<MATHARRREPL> (String)
2664
2665 C<ARRENV> (RegEx)
2666
2667 C<COUNTERCMD> (RegEx)
2668
2669 =item B<--show-safecmd>
2670
2671 Print list of RegEx matching and excluding safe commands.
2672
2673 =item B<--show-textcmd>
2674
2675 Print list of RegEx matching and excluding commands with text argument.
2676
2677 =item B<--show-config>
2678
2679 Show values of configuration variables.
2680
2681 =item B<--show-all>
2682
2683 Combine all --show commands.
2684
2685 NB For all --show commands, no C<old.tex> or C<new.tex> file needs to be specified, and no
2686 differencing takes place.
2687
2688
2689 =back
2690
2691 =head2 Miscellaneous
2692
2693 =over 4
2694
2695 =item B<--verbose> or B<-V>
2696
2697 Output various status information to stderr during processing.
2698 Default is to work silently.
2699
2700 =item B<--ignore-warnings>
2701
2702 Suppress warnings about inconsistencies in length between input and
2703 parsed strings and missing characters.  These warning messages are
2704 often related to non-standard latex or latex constructions with a
2705 syntax unknown to C<latexdiff> but the resulting difference argument
2706 is often fully functional anyway, particularly if the non-standard
2707 latex only occurs in parts of the text which have not changed.
2708
2709 =item B<--allow-spaces>
2710
2711 Allow spaces between bracketed or braced arguments to commands.  Note
2712 that this option might have undesirable side effects (unrelated scope
2713 might get lumpeded with preceding commands) so should only be used if the
2714 default produces erroneous results.  (Default requires arguments to
2715 directly follow each other without intervening spaces).
2716
2717 =item B<--flatten>
2718
2719 Replace C<\input> and C<\include> commands within body by the content
2720 of the files in their argument.  If C<\includeonly> is present in the
2721 preamble, only those files are expanded into the document. However,
2722 no recursion is done, i.e. C<\input> and C<\include> commands within
2723 included sections are not expanded.  The included files are assumed to
2724  be located in the same directories as the old and new master files,
2725 respectively, making it possible to organise files into old and new directories.
2726
2727 Use of this option is not recommended
2728 primarily the processing time for the large documents is prohibitive, and
2729 the resulting difference document no longer reflects the structure of the
2730 input documents.
2731
2732 =item B<--help> or
2733 B<-h>
2734
2735 Show help text
2736
2737 =item B<--version>
2738
2739 Show version number
2740
2741 =back
2742
2743 =head2 Predefined styles
2744
2745 =head2 Major types
2746
2747 The major type determine the markup of plain text and some selected latex commands outside floats by defining the markup commands C<\DIFadd{...}> and C<\DIFdel{...}> .
2748
2749 =over 10
2750
2751 =item C<UNDERLINE>
2752
2753 Added text is wavy-underlined and blue, discarded text is struck out and red
2754 (Requires color and ulem packages).  Overstriking does not work in displayed math equations such that deleted parts of equation are underlined, not struck out (this is a shortcoming inherent to the ulem package).
2755
2756 =item C<CTRADITIONAL>
2757
2758 Added text is blue and set in sans-serif, and a red footnote is created for each discarded
2759 piece of text. (Requires color package)
2760
2761 =item C<TRADITIONAL>
2762
2763 Like C<CTRADITIONAL> but without the use of color.
2764
2765 =item C<CFONT>
2766
2767 Added text is blue and set in sans-serif, and discarded text is red and very small size.
2768
2769 =item C<FONTSTRIKE>
2770
2771 Added tex is set in sans-serif, discarded text small and struck out
2772
2773 =item C<CCHANGEBAR>
2774
2775 Added text is blue, and discarded text is red.  Additionally, the changed text is marked with a bar in the margin (Requires color and changebar packages).
2776
2777 =item C<CFONTCHBAR>
2778
2779 Like C<CFONT> but with additional changebars (Requires color and changebar packages).
2780
2781 =item C<CULINECHBAR>
2782
2783 Like C<UNDERLINE> but with additional changebars (Requires color, ulem and changebar packages).
2784
2785 =item C<CHANGEBAR>
2786
2787 No mark up of text, but mark margins with changebars (Requires changebar package).
2788
2789 =item C<INVISIBLE>
2790
2791 No visible markup (but generic markup commands will still be inserted.
2792
2793 =back
2794
2795 =head2 Subtypes
2796
2797 The subtype defines the commands that are inserted at the begin and end of added or discarded blocks, irrespectively of whether these blocks contain text or commands (Defined commands: C<\DIFaddbegin, \DIFaddend, \DIFdelbegin, \DIFdelend>)
2798
2799 =over 10
2800
2801 =item C<SAFE>
2802
2803 No additional markup (Recommended choice)
2804
2805 =item C<MARGIN>
2806
2807 Mark beginning and end of changed blocks with symbols in the margin nearby (using
2808 the standard C<\marginpar> command - note that this sometimes moves somewhat
2809 from the intended position.
2810
2811 =item C<DVIPSCOL>
2812
2813 An alternative way of marking added passages in blue, and deleted ones in red. Note
2814 that C<DVIPSCOL> only works with the dvips converter, e.g. not pdflatex.
2815 (it is recommeneded to use instead the main types to effect colored markup,
2816 although in some cases coloring with dvipscol can be more complete).
2817
2818 =back
2819
2820 =head2 Float Types
2821
2822 Some of the markup used in the main text might cause problems when used within
2823 floats (e.g. figures or tables).  For this reason alternative versions of all
2824 markup commands are used within floats. The float type defines these alternative commands.
2825
2826 =over 10
2827
2828 =item C<FLOATSAFE>
2829
2830 Use identical markup for text as in the main body, but set all commands marking the begin and end of changed blocks to null-commands.  You have to choose this float type if your subtype is C<MARGIN> as C<\marginpar> does not work properly within floats.
2831
2832 =item C<TRADITIONALSAFE>
2833
2834 Mark additions the same way as in the main text.  Deleted environments are marked by angular brackets \[ and \] and the deleted text is set in scriptscript size. This float type should always be used with the C<TRADITIONAL> and  C<CTRADITIONAL> markup types as the \footnote command does not work properly in floating environments.
2835
2836 =item C<IDENTICAL>
2837
2838 Make no difference between the main text and floats.
2839
2840 =back
2841
2842
2843 =head2 Configuration Variables
2844
2845 =over 10
2846
2847 =item C<MINWORDSBLOCK>
2848
2849 Minimum number of tokens required to form an independent block. This value is
2850 used in the algorithm to detect changes of complete blocks by merging identical text parts of less than C<MINWORDSBLOCK> to the preceding added and discarded parts.
2851
2852 [ Default: 3 ]
2853
2854 =item C<FLOATENV>
2855
2856 Environments whose name matches the regular expression in C<FLOATENV> are
2857 considered floats.  Within these environments, the I<latexdiff> markup commands
2858 are replaced by their FL variaties.
2859
2860 [ Default: S<C<(?:figure|table|plate)[\w\d*@]*> >]
2861
2862 =item C<PICTUREENV>
2863
2864 Within environments whose name matches the regular expression in C<PICTUREENV>
2865 all latexdiff markup is removed (in pathologic cases this might lead to
2866  inconsistent markup but this situation should be rare).
2867
2868 [ Default: S<C<(?:picture|DIFnomarkup)[\w\d*@]*> >]
2869
2870 =item C<MATHENV>,C<MATHREPL>
2871
2872 If both \begin and \end for a math environment (environment name matching C<MATHENV>
2873 or \[ and \])
2874 are within the same deleted block, they are replaced by a \begin and \end commands for C<MATHREPL>
2875 rather than being commented out.
2876
2877 [ Default: C<MATHENV>=S<C<(?:displaymath|equation)> >, C<MATHREPL>=S<C<displaymath> >]
2878
2879 =item C<MATHARRENV>,C<MATHARRREPL>
2880
2881 as C<MATHENV>,C<MATHREPL> but for equation arrays
2882
2883 [ Default: C<MATHARRENV>=S<C<eqnarray\*?> >, C<MATHREPL>=S<C<eqnarray> >]
2884
2885 =item C<ARRENV>
2886
2887 If a match to C<ARRENV> is found within an inline math environment within a deleted or added block, then the inlined math
2888 is surrounded by C<\mbox{>...C<}>.  This is necessary as underlining does not work within inlined array environments.
2889
2890 [ Default: C<ARRENV>=S<C<(?:array|[pbvBV]matrix)> >
2891
2892 =item C<COUNTERCMD>
2893
2894 If a command in a deleted block which is also in the textcmd list matches C<COUNTERCMD> then an
2895 additional command C<\addtocounter{>F<cntcmd>C<}{-1}>, where F<cntcmd> is the matching command, is appended in the diff file such that the numbering in the diff file remains synchronized with the
2896 numbering in the new file.
2897
2898 [ Default: C<COUNTERCMD>=C<(?:footnote|part|section|subsection> ...
2899
2900 C<|subsubsection|paragraph|subparagraph)>  ]
2901
2902 =back
2903
2904 =head1 BUGS
2905
2906 UTF-8 support requires a relatively new version of perl (5.8.0 is sufficient
2907 but 5.6.2 up would work OK, too).
2908
2909 Option allow-spaces not implemented entirely consistently. It breaks
2910 the rules that number and type of white space does not matter, as
2911 different numbers of inter-argument spaces are treated as significant.
2912
2913 Please send bug reports
2914 to I<tilmann@esc.cam.ac.uk>.  Include the serial number of I<latexdiff>
2915 (from comments at the top of the source or use B<--version>).  If you come across latex
2916 files that are error-free and conform to the specifications set out
2917 above, and whose differencing still does not result in error-free
2918 latex, please send me those files, ideally edited to only contain the
2919 offending passage as long as that still reproduces the problem.
2920
2921 =head1 SEE ALSO
2922
2923 L<latexrevise>
2924
2925 =head1 PORTABILITY
2926
2927 I<latexdiff> does not make use of external commands and thus should run
2928 on any platform  supporting Perl 5.6 or higher.  If files with encodings
2929 other than ASCII or UTF-8 are processed, Perl 5.8 or higher is required.
2930
2931 The standard version of I<latexdiff> requires installation of the Perl package
2932 C<Algorithm::Diff> (available from I<www.cpan.org> -
2933 I<http://search.cpan.org/~nedkonz/Algorithm-Diff-1.15>) but a stand-alone
2934 version, I<latexdiff-so>, which has this package inlined, is available, too.
2935 I<latexdiff-fast> requires the I<diff> command to be present.
2936
2937 =head1 AUTHOR
2938
2939 Copyright (C) 2004-2007 Frederik Tilmann
2940
2941 This program is free software; you can redistribute it and/or modify
2942 it under the terms of the GNU General Public License Version 2
2943
2944 Contributors of fixes and additions: V. Kuhlmann, J. Paisley, N. Becker, T. Doerges, K. Huebner
2945 Thanks to everyone who send in bug reports.
2946
2947 =cut
2948
2949 __END__
2950 %%BEGIN SAFE COMMANDS
2951 % Regex matching commands which can safely be in the
2952 % argument of a \DIFadd or \DIFdel command (leave out the \)
2953 arabic
2954 dashbox
2955 emph
2956 fbox
2957 framebox
2958 hspace
2959 math.*
2960 makebox
2961 mbox
2962 pageref
2963 ref
2964 symbol
2965 raisebox
2966 rule
2967 text.*
2968 shortstack
2969 usebox
2970 dag
2971 ddag
2972 copyright
2973 pounds
2974 S
2975 P
2976 oe
2977 OE
2978 ae
2979 AE
2980 aa
2981 AA
2982 o
2983 O
2984 l
2985 L
2986 frac
2987 ss
2988 sqrt
2989 ldots
2990 cdots
2991 vdots
2992 ddots
2993 alpha
2994 beta
2995 gamma
2996 delta
2997 epsilon
2998 varepsilon
2999 zeta
3000 eta
3001 theta
3002 vartheta
3003 iota
3004 kappa
3005 lambda
3006 mu
3007 nu
3008 xi
3009 pi
3010 varpi
3011 rho
3012 varrho
3013 sigma
3014 varsigma
3015 tau
3016 upsilon
3017 phi
3018 varphi
3019 chi
3020 psi
3021 omega
3022 Gamma
3023 Delta
3024 Theta
3025 Lambda
3026 Xi
3027 Pi
3028 Sigma
3029 Upsilon
3030 Phi
3031 Psi
3032 Omega
3033 ps
3034 mp
3035 times
3036 div
3037 ast
3038 star
3039 circ
3040 bullet
3041 cdot
3042 cap
3043 cup
3044 uplus
3045 sqcap
3046 vee
3047 wedge
3048 setminus
3049 wr
3050 diamond
3051 (?:big)?triangle.*
3052 lhd
3053 rhd
3054 unlhd
3055 unrhd
3056 oplus
3057 ominus
3058 otimes
3059 oslash
3060 odot
3061 bigcirc
3062 d?dagger
3063 amalg
3064 leq
3065 prec
3066 preceq
3067 ll
3068 (?:sq)?su[bp]set(?:eq)?
3069 in
3070 vdash
3071 geq
3072 succ(?:eq)?
3073 gg
3074 ni
3075 dashv
3076 equiv
3077 sim(?:eq)?
3078 asymp
3079 approx
3080 cong
3081 neq
3082 doteq
3083 propto
3084 models
3085 perp
3086 mid
3087 parallel
3088 bowtie
3089 Join
3090 smile
3091 frown
3092 .*arrow
3093 (?:long)?mapsto
3094 .*harpoon.*
3095 leadsto
3096 aleph
3097 hbar
3098 imath
3099 jmath
3100 ell
3101 wp
3102 Re
3103 Im
3104 mho
3105 prime
3106 emptyset
3107 nabla
3108 surd
3109 top
3110 bot
3111 angle
3112 forall
3113 exists
3114 neg
3115 flat
3116 natural
3117 sharp
3118 backslash
3119 partial
3120 infty
3121 Box
3122 Diamond
3123 triangle
3124 clubsuit
3125 diamondsuit
3126 heartsuit
3127 spadesuit
3128 sum
3129 prod
3130 coprod
3131 int
3132 oint
3133 big(?:sq)?c[au]p
3134 bigvee
3135 bigwedge
3136 bigodot
3137 bigotimes
3138 bigoplus
3139 biguplus
3140 (?:arc)?(?:cos|sin|tan|cot)h?
3141 csc
3142 arg
3143 deg
3144 det
3145 dim
3146 exp
3147 gcd
3148 hom
3149 inf
3150 ker
3151 lg
3152 lim
3153 liminf
3154 limsup
3155 ln
3156 log
3157 max
3158 min
3159 Pr
3160 sec
3161 sup
3162 (SUPER|SUB)SCRIPTNB
3163 (SUPER|SUB)SCRIPT
3164 %%END SAFE COMMANDS
3165
3166 %%BEGIN TEXT COMMANDS
3167 % Regex matching commands with a text argument (leave out the \)
3168 addcontents.*
3169 cc
3170 closing
3171 chapter
3172 dashbox
3173 emph
3174 encl
3175 fbox
3176 framebox
3177 footnote
3178 footnotetext
3179 framebox
3180 part
3181 (sub){0,2}section\*?
3182 (sub)?paragraph\*?
3183 makebox
3184 mbox
3185 opening
3186 parbox
3187 raisebox
3188 savebox
3189 sbox
3190 shortstack
3191 signature
3192 text.*
3193 value
3194 underline
3195 sqrt
3196 (SUPER|SUB)SCRIPT
3197 %%END TEXT COMMANDS
3198
3199 %%BEGIN CONTEXT1 COMMANDS
3200 % Regex matching commands with a text argument (leave out the \), which will fail out of context, but who's arguemtn should be printed as plain text
3201 caption
3202 %%END CONTEXT1 COMMANDS
3203
3204 %%BEGIN CONTEXT2 COMMANDS
3205 % Regex matching commands with a text argument (leave out the \), which will fail out of context, but who's arguemtn should be printed as plain text
3206 title
3207 author
3208 %%END CONTEXT2 COMMANDS
3209
3210
3211 %% TYPES (Commands for highlighting changed blocks)
3212
3213 %DIF UNDERLINE PREAMBLE
3214 \RequirePackage[normalem]{ulem}
3215 \RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
3216 \providecommand{\DIFadd}[1]{{\protect\color{blue}\uwave{#1}}}
3217 \providecommand{\DIFdel}[1]{{\protect\color{red}\sout{#1}}}
3218 %DIF END UNDERLINE PREAMBLE
3219
3220 %DIF CTRADITIONAL PREAMBLE
3221 \RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
3222 \RequirePackage[stable]{footmisc}
3223 \providecommand{\DIFadd}[1]{{\protect\color{blue} \sf #1}}
3224 \providecommand{\DIFdel}[1]{{\protect\color{red} [..\footnote{removed: #1} ]}}
3225 %DIF END CTRADITIONAL PREAMBLE
3226
3227 %DIF TRADITIONAL PREAMBLE
3228 \RequirePackage[stable]{footmisc}
3229 \providecommand{\DIFadd}[1]{{\sf #1}}
3230 \providecommand{\DIFdel}[1]{{[..\footnote{removed: #1} ]}}
3231 %DIF END TRADITIONAL PREAMBLE
3232
3233 %DIF CFONT PREAMBLE
3234 \RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
3235 \providecommand{\DIFadd}[1]{{\protect\color{blue} \sf #1}}
3236 \providecommand{\DIFdel}[1]{{\protect\color{red} \scriptsize #1}}
3237 %DIF END CFONT PREAMBLE
3238
3239 %DIF FONTSTRIKE PREAMBLE
3240 \RequirePackage[normalem]{ulem}
3241 \providecommand{\DIFadd}[1]{{\sf #1}}
3242 \providecommand{\DIFdel}[1]{{\footnotesize \sout{#1}}}
3243 %DIF END FONTSTRIKE PREAMBLE
3244
3245 %DIF CCHANGEBAR PREAMBLE
3246 \RequirePackage[dvips]{changebar}
3247 \RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
3248 \providecommand{\DIFadd}[1]{\protect\cbstart{\protect\color{blue}#1}\protect\cbend}
3249 \providecommand{\DIFdel}[1]{\protect\cbdelete{\protect\color{red}#1}\protect\cbdelete}
3250 %DIF END CCHANGEBAR PREAMBLE
3251
3252 %DIF CFONTCHBAR PREAMBLE
3253 \RequirePackage[dvips]{changebar}
3254 \RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1}
3255 \providecommand{\DIFadd}[1]{\protect\cbstart{\protect\color{blue}\sf #1}\protect\cbend}
3256 \providecommand{\DIFdel}[1]{\protect\cbdelete{\protect\color{red}\scriptsize #1}\protect\cbdelete}
3257 %DIF END CFONTCHBAR PREAMBLE
3258
3259 %DIF CULINECHBAR PREAMBLE
3260 \RequirePackage[normalem]{ulem}
3261 \RequirePackage[dvips]{changebar}
3262 \RequirePackage{color}
3263 \providecommand{\DIFadd}[1]{\protect\cbstart{\protect\color{blue}\uwave{#1}}\protect\cbend}
3264 \providecommand{\DIFdel}[1]{\protect\cbdelete{\protect\color{red}\sout{#1}}\protect\cbdelete}
3265 %DIF END CULINECHBAR PREAMBLE
3266
3267 %DIF CHANGEBAR PREAMBLE
3268 \RequirePackage[dvips]{changebar}
3269 \providecommand{\DIFadd}[1]{\protect\cbstart{#1}\protect\cbend}
3270 \providecommand{\DIFdel}[1]{\protect\cbdelete}
3271 %DIF END CHANGEBAR PREAMBLE
3272
3273 %DIF INVISIBLE PREAMBLE
3274 \providecommand{\DIFadd}[1]{#1}
3275 \providecommand{\DIFdel}[1]{}
3276 %DIF END INVISIBLE PREAMBLE
3277
3278
3279 %% SUBTYPES (Markers for beginning and end of changed blocks)
3280
3281 %DIF SAFE PREAMBLE
3282 \providecommand{\DIFaddbegin}{}
3283 \providecommand{\DIFaddend}{}
3284 \providecommand{\DIFdelbegin}{}
3285 \providecommand{\DIFdelend}{}
3286 %DIF END SAFE PREAMBLE
3287
3288 %DIF MARGIN PREAMBLE
3289 \providecommand{\DIFaddbegin}{\protect\marginpar{a[}}
3290 \providecommand{\DIFaddend}{\protect\marginpar{]}}
3291 \providecommand{\DIFdelbegin}{\protect\marginpar{d[}}
3292 \providecommand{\DIFdelend}{\protect\marginpar{]}}
3293 %DIF END BRACKET PREAMBLE
3294
3295 %DIF DVIPSCOL PREAMBLE
3296 %Note: only works with dvips converter
3297 \RequirePackage{color}
3298 \RequirePackage{dvipscol}
3299 \providecommand{\DIFaddbegin}{\protect\nogroupcolor{blue}}
3300 \providecommand{\DIFaddend}{\protect\nogroupcolor{black}}
3301 \providecommand{\DIFdelbegin}{\protect\nogroupcolor{red}}
3302 \providecommand{\DIFdelend}{\protect\nogroupcolor{black}}
3303 %DIF END DVIPSCOL PREAMBLE
3304
3305
3306 %% FLOAT TYPES
3307
3308 %DIF FLOATSAFE PREAMBLE
3309 \providecommand{\DIFaddFL}[1]{\DIFadd{#1}}
3310 \providecommand{\DIFdelFL}[1]{\DIFdel{#1}}
3311 \providecommand{\DIFaddbeginFL}{}
3312 \providecommand{\DIFaddendFL}{}
3313 \providecommand{\DIFdelbeginFL}{}
3314 \providecommand{\DIFdelendFL}{}
3315 %DIF END FLOATSAFE PREAMBLE
3316
3317 %DIF IDENTICAL PREAMBLE
3318 \providecommand{\DIFaddFL}[1]{\DIFadd{#1}}
3319 \providecommand{\DIFdelFL}[1]{\DIFdel{#1}}
3320 \providecommand{\DIFaddbeginFL}{\DIFaddbegin}
3321 \providecommand{\DIFaddendFL}{\DIFaddend}
3322 \providecommand{\DIFdelbeginFL}{\DIFdelbegin}
3323 \providecommand{\DIFdelendFL}{\DIFdelend}
3324 %DIF END IDENTICAL PREAMBLE
3325
3326 %DIF TRADITIONALSAFE PREAMBLE
3327 % procidecommand color to make this work for TRADITIONAL and CTRADITIONAL
3328 \providecommand{\color}[1]{}
3329 \providecommand{\DIFaddFL}[1]{\DIFadd{#1}}
3330 \providecommand{\DIFdel}[1]{{\protect\color{red}[..{\scriptsize {removed: #1}} ]}}
3331 \providecommand{\DIFaddbeginFL}{}
3332 \providecommand{\DIFaddendFL}{}
3333 \providecommand{\DIFdelbeginFL}{}
3334 \providecommand{\DIFdelendFL}{}
3335 %DIF END FLOATSAFE PREAMBLE
3336
3337 %% SPECIAL PACKAGE PREAMBLE COMMANDS
3338
3339 % Standard \DIFadd and \DIFdel are redefined as \DIFaddtex and \DIFdeltex
3340 % when hyperref package is included.
3341 %DIF HYPERREF PREAMBLE
3342 \providecommand{\DIFadd}[1]{\texorpdfstring{\DIFaddtex{#1}}{#1}}
3343 \providecommand{\DIFdel}[1]{\texorpdfstring{\DIFdeltex{#1}}{}}
3344 %DIF END HYPERREF PACKAGE