1package VCS::Lite::Delta; 2 3use strict; 4use warnings; 5our $VERSION = '0.12'; 6 7#---------------------------------------------------------------------------- 8 9=head1 NAME 10 11VCS::Lite::Delta - VCS::Lite differences 12 13=head1 SYNOPSIS 14 15 use VCS::Lite; 16 17 # diff 18 19 my $lit = VCS::Lite->new('/home/me/foo1.txt'); 20 my $lit2 = VCS::Lite->new('/home/me/foo2.txt'); 21 my $difftxt = $lit->delta($lit2)->diff; 22 print OUTFILE $difftxt; 23 24 # patch 25 26 my $delt = VCS::Lite::Delta->new('/home/me/patch.diff'); 27 my $lit3 = $lit->patch($delt); 28 print OUTFILE $lit3->text; 29 30=head1 DESCRIPTION 31 32This module provides a Delta class for the differencing functionality of 33VCS::Lite 34 35=cut 36 37#---------------------------------------------------------------------------- 38 39############################################################################# 40#Library Modules # 41############################################################################# 42 43use Carp; 44 45#---------------------------------------------------------------------------- 46 47# Error handling, use package vars to control it for now. 48use vars qw($error_action $error_msg $error_line); 49 50#---------------------------------------------------------------------------- 51 52############################################################################# 53#Interface Methods # 54############################################################################# 55 56sub new { 57 my $class = shift; 58 my $src = shift; 59 60 # DWIM logic, based on $src parameter. 61 62 # Case 0: string. Use $id as file name, becomes case 2 63 if ( !ref $src ) { 64 open my $fh, $src or croak("failed to open '$src': $!"); 65 $src = $fh; # becomes case 2 below 66 } 67 my $atyp = ref $src; 68 69 # Case 1: $src is arrayref 70 return bless { 71 id1 => $_[0], 72 id2 => $_[1], 73 sep => $_[2], 74 diff => [@$src] 75 }, 76 $class 77 if $atyp eq 'ARRAY'; 78 79 my $sep = shift; 80 my %proto; 81 82 # Decode $sep as needed 83 84 if (ref($sep) eq 'HASH') { 85 %proto = %$sep; 86 $sep = $proto{in}; 87 delete $proto{in}; 88 } 89 90 $sep ||= $/; 91 local $/ = $sep if $sep; 92 $sep ||= ''; 93 my @diff; 94 95 # Case 2: $src is globref (file handle) - slurp file 96 if ( $atyp eq 'GLOB' ) { 97 @diff = <$src>; 98 } 99 100 # Case 3: $src is scalar ref (string) 101 elsif ( $atyp eq 'SCALAR' ) { 102 @diff = split /(?=$sep)/, $$src; 103 } 104 105 # Case otherwise is an error. 106 else { 107 croak "Invalid argument to VCS::Lite::Delta::new"; 108 } 109 110 # If we have reached this point, we have been passed something in a 111 # text/diff format. It could be diff or udiff format. 112 113 my ( $id1, $id2 ) = @_; 114 my @out; 115 116 if ( $diff[0] =~ /^---/ ) { # udiff format 117 my $state = 'inputdef'; 118 my ( $a_line, $a_count, @a_hunk, $b_line, $b_count, @b_hunk ); 119 for my $lin ( 0 .. $#diff ) { 120 local $_ = $diff[$lin]; 121 chomp if $proto{chomp}; 122 # inputdef = --- and +++ to identify the files being diffed 123 124 if ( $state eq 'inputdef' ) { 125 $id1 = $1 if /^--- # --- 126 \s 127 (\S+)/x; # file => $1 128 $id2 = $1 if /^\+{3} # +++ 129 \s 130 (\S+)/x; # file => $1 131 $state = 'patch' if /^\@\@/; 132 } 133 134 # patch expects @@ -a,b +c,d @@ 135 136 if ( $state eq 'patch' ) { 137 next unless /^\@\@ 138 \s+ 139 - 140 (\d+) # line of file 1 => $1 141 , 142 (\d+) # count of file 1 => $2 143 \s* 144 \+ 145 (\d+) # line of file 2 => $3 146 , 147 (\d+) # count of file 2 => $4 148 \s* 149 \@\@/x; 150 $a_line = $1 - 1; 151 $a_count = $2; 152 $b_line = $3 - 1; 153 $b_count = $4; 154 $state = 'detail'; 155 next; 156 } 157 158 # detail expects [-+ ]line of text 159 160 if ( $state eq 'detail' ) { 161 my $ind = substr $_, 0, 1, ''; 162 _error( $lin, 'Bad diff' ), return undef 163 unless $ind =~ /[ +\-i\\]/; 164 165 next if $ind eq '\\'; 166 167 #[- ]line, add to @a_hunk 168 if ( $ind ne '+' ) { 169 my $lead = '-'; 170 if (($lin < $#diff) && $diff[$lin+1] =~ /^\\/) { 171 $lead .= '/'; 172 s/$sep$//s; 173 } 174 push @a_hunk, [ $lead, $a_line++, $_ ]; 175 $a_count--; 176 _error( $lin, 'Too large diff' ), return undef 177 if $a_count < 0; 178 } 179 180 #[+ ]line, add to @b_hunk 181 if ( $ind ne '-' ) { 182 my $lead = '+'; 183 if (($lin < $#diff) && $diff[$lin+1] =~ /^\\/) { 184 $lead .= '/'; 185 s/$sep$//s; 186 } 187 push @b_hunk, [ $lead, $b_line++, $_ ]; 188 $b_count--; 189 _error( $lin, 'Too large diff' ), return undef 190 if $b_count < 0; 191 } 192 193 # are we there yet, daddy? 194 if ( !$a_count and !$b_count ) { 195 push @out, [ @a_hunk, @b_hunk ]; 196 @a_hunk = @b_hunk = (); 197 $state = 'patch'; 198 } 199 } 200 } # next line of patch 201 return bless { 202 id1 => $id1, 203 id2 => $id2, 204 sep => $sep, 205 diff => \@out, 206 %proto 207 }, $class; 208 } 209 210 # not a udiff mode patch, assume straight diff mode 211 212 my $state = 'patch'; 213 my ( $a_line, $a_count, @a_hunk, $b_line, $b_count, @b_hunk ); 214 for my $lin ( 0 .. $#diff ) { 215 local $_ = $diff[$lin]; 216 chomp if $proto{chomp}; 217 218 # patch expects ww,xx[acd]yy,zz style 219 220 if ( $state eq 'patch' ) { 221 next unless /^(\d+) # start line of file 1 => $1 222 (?:,(\d+))? # end line of file 1 => $2 223 ([acd]) # Add, change, delete => $3 224 (\d+) # start line of file 2 => $4 225 (?:,(\d+))? # end line of file 2 => $5 226 /x; 227 $a_line = $1 - 1; 228 $a_count = $2 ? ( $2 - $a_line ) : 1; 229 $b_line = $4 - 1; 230 $b_count = $5 ? ( $5 - $b_line ) : 1; 231 $a_count = 0 if $3 eq 'a'; 232 $b_count = 0 if $3 eq 'd'; 233 $state = 'detail'; 234 next; 235 } 236 237 # detail expects < lines --- > lines 238 239 if ( $state eq 'detail' ) { 240 next if /^---/; # ignore separator 241 my $ind = substr $_, 0, 2, ''; 242 _error( $lin, 'Bad diff' ), return undef 243 unless $ind =~ /[<>\\] /; 244 245 # < line goes to @a_hunk 246 if ( $ind eq '< ' ) { 247 my $lead = '-'; 248 if (($lin < $#diff) && $diff[$lin+1] =~ /^\\/) { 249 $lead .= '/'; 250 s/$sep$//s; 251 } 252 push @a_hunk, [ $lead, $a_line++, $_ ]; 253 $a_count--; 254 _error( $lin, 'Too large diff' ), return undef 255 if $a_count < 0; 256 } 257 258 # > line goes to @b_hunk 259 if ( $ind eq '> ' ) { 260 my $lead = '+'; 261 if (($lin < $#diff) && $diff[$lin+1] =~ /^\\/) { 262 $lead .= '/'; 263 s/$sep$//s; 264 } 265 push @b_hunk, [ $lead, $b_line++, $_ ]; 266 $b_count--; 267 _error( $lin, 'Too large diff' ), return undef 268 if $b_count < 0; 269 } 270 271 # are we there yet, daddy? 272 if ( !$a_count and !$b_count ) { 273 push @out, [ @a_hunk, @b_hunk ]; 274 @a_hunk = @b_hunk = (); 275 $state = 'patch'; 276 } 277 } 278 } 279 return bless { 280 id1 => $id1, 281 id2 => $id2, 282 sep => $sep, 283 diff => \@out, 284 %proto 285 }, $class; 286} 287 288sub _error { 289 ( $error_line, my $msg ) = @_; 290 291 $error_msg = "Line $error_line: $msg"; 292 293 goto &$error_action if ref($error_action) eq 'CODE'; 294 confess $error_msg if $error_action eq 'raise'; 295 296 print STDERR $error_msg, "\n" unless $error_action eq 'silent'; 297} 298 299sub _diff_hunk { 300 301 my $sep = shift; 302 my $r_line_offset = shift; 303 304 my @ins; 305 my ( $ins_firstline, $ins_lastline ) = ( 0, 0 ); 306 my @del; 307 my ( $del_firstline, $del_lastline ) = ( 0, 0 ); 308 my $op; 309 my $shortins = ''; 310 my $shortdel = ''; 311 312 # construct @ins and @del from hunk 313 314 for (@_) { 315 my ( $typ, $lno, $txt ) = @$_; 316 my $short = substr($typ, 1, 1, ''); 317 $lno++; 318 if ( $typ eq '+' ) { 319 push @ins, $txt; 320 $ins_firstline ||= $lno; 321 $ins_lastline = $lno; 322 $shortins = "\n\\ No newline at end of file\n" if $short; 323 } 324 else { 325 push @del, $txt; 326 $del_firstline ||= $lno; 327 $del_lastline = $lno; 328 $shortdel = "\n\\ No newline at end of file\n" if $short; 329 } 330 } 331 332 # Work out whether we are a, c or d 333 334 if ( !@del ) { 335 $op = 'a'; 336 $del_firstline = $ins_firstline - $$r_line_offset - 1; 337 } 338 elsif ( !@ins ) { 339 $op = 'd'; 340 $ins_firstline = $del_firstline + $$r_line_offset - 1; 341 } 342 else { 343 $op = 'c'; 344 } 345 346 $$r_line_offset += @ins - @del; 347 348 $ins_lastline ||= $ins_firstline; 349 $del_lastline ||= $del_firstline; 350 351 # Make the header line 352 353 my $outstr = 354 "$del_firstline,$del_lastline$op$ins_firstline,$ins_lastline\n"; 355 $outstr =~ s/(^|\D)(\d+),\2(?=\D|$)/$1$2/g; 356 357 # < deletions 358 for (@del) { 359 $outstr .= '< ' . $_ . $sep; 360 } 361 $outstr .= $shortdel; 362 363 # --- 364 $outstr .= "---\n" if @ins && @del; 365 366 # > insertions 367 for (@ins) { 368 $outstr .= '> ' . $_ . $sep; 369 } 370 $outstr .= $shortins; 371 372 $outstr; 373} 374 375sub diff { 376 my $self = shift; 377 my $sep = shift || $self->{sep} || ''; 378 379 my $off = 0; 380 381 join '', map { _diff_hunk( $sep, \$off, @$_ ) } @{ $self->{diff} }; 382} 383 384sub udiff { 385 my $self = shift; 386 my $sep = shift || $self->{sep} || ''; 387 388 my ( $in, $out, $diff ) = @{$self}{qw/id1 id2 diff/}; 389 390 # Header with file names 391 392 my @out = ( "--- $in \n", "+++ $out \n" ); 393 394 my $offset = 0; 395 396 for (@$diff) { 397 my @t1 = grep { $_->[0] =~ /^\-/ } @$_; 398 my @t2 = grep { $_->[0] =~ /^\+/ } @$_; 399 400 my $short1 = ''; 401 $short1 = "\n\\ No newline at end of file\n" 402 if grep { $_->[0] eq '-/' } @t1; 403 my $short2 = ''; 404 $short2 = "\n\\ No newline at end of file\n" 405 if grep { $_->[0] eq '+/' } @t2; 406 407 # Work out base line numbers in both files 408 409 my $base1 = @t1 ? $t1[0][1] : $t2[0][1] - $offset; 410 my $base2 = @t2 ? $t2[0][1] : $t1[0][1] + $offset; 411 $base1++; 412 $base2++; # Our lines were 0 based 413 $offset += @t2 - @t1; 414 my $count1 = @t1; 415 my $count2 = @t2; 416 417 # Header line 418 push @out, "@@ -$base1,$count1 +$base2,$count2 @@\n"; 419 420 # Use Algorithm::Diff::sdiff to munge out any lines in common inside 421 # the hunk 422 my @txt1 = map { $_->[2] } @t1; 423 my @txt2 = map { $_->[2] } @t2; 424 425 my @ad = Algorithm::Diff::sdiff( \@txt1, \@txt2 ); 426 my @defer; 427 428 # for each subhunk, we want all the file1 lines first, then all the file2 lines 429 430 for (@ad) { 431 my ( $ind, $txt1, $txt2 ) = @$_; 432 433 # we want to flush out the + lines when we run off the end of a 'c' section 434 435 ( push @out, @defer ), @defer = () unless $ind eq 'c'; 436 437 # unchanged lines, just wack 'em out 438 ( push @out, ' ' . $txt1 . $sep ), next if $ind eq 'u'; 439 440 # output original line (- line) 441 push @out, '-' . $txt1 . $sep unless $ind eq '+'; 442 443 # defer changed + lines 444 push @defer, '+' . $txt2 . $sep unless $ind eq '-'; 445 } 446 push @out, $short1; 447 448 # and flush at the end 449 push @out, @defer, $short2; 450 } 451 wantarray ? @out : join '', @out; 452} 453 454sub id { 455 my $self = shift; 456 457 if (@_) { 458 $self->{id1} = shift; 459 $self->{id2} = shift; 460 } 461 462 @{$self}{qw/id1 id2/}; 463} 464 465sub hunks { 466 my $self = shift; 467 468 @{ $self->{diff} }; 469} 470 4711; 472 473__END__ 474 475#---------------------------------------------------------------------------- 476 477=head1 API 478 479=head2 new 480 481The underlying object of VCS::Lite::Delta is an array of difference 482chunks (hunks) such as that returned by Algorithm::Diff. 483 484The constructor takes the following forms: 485 486 my $delt = VCS::Lite::Delta->new( '/my/file.diff',$sep); # File name 487 my $delt = VCS::Lite::Delta->new( \*FILE,$sep); # File handle 488 my $delt = VCS::Lite::Delta->new( \$string,$sep); # String as scalar ref 489 my $delt = VCS::Lite::Delta->new( \@foo, $id1, $id2) # Array ref 490 491$sep here is a regexp by which to split strings into tokens. 492The default is to use the natural perl mechanism of $/ (which is emulated 493when not reading from a file). The arrayref form is assuming an array of 494hunks such as the output from L<Algorithm::Diff::diff>. 495 496The other forms assume the input is the text form of a diff listing, 497either in diff format, or in unified format. The input is parsed, and errors 498are reported. 499 500=head2 diff 501 502 print OUTFILE $delt->diff 503 504This generates a standard diff format, for example: 505 5064c4 507< Now wherefore stopp'st thou me? 508--- 509> Now wherefore stoppest thou me? 510 511=head2 udiff 512 513 print OUTFILE $delt->udiff 514 515This generates a unified diff (like diff -u) similar to the form in which 516patches are submitted. 517 518=head2 id 519 520 my ($id1,$id2) = $delt->id; 521 $delt2->id('foo.pl@@1','foo.pl@@3') 522 523The I<id> method allows get and set of the names associated with the two 524elements being diffed. The id is set for delta objects returned by 525VCS::Lite->diff, to the element IDs of the VCS::Lite objects being diffed. 526 527Diff format omits the file names, hence the IDs will not be populated by 528new. This is not the case with diff -u format, which includes the file 529names which are passed in and available as IDs. 530 531=head2 hunks 532 533 my @hunklist = $delt->hunks 534 535A hunk is a technical term for a section of input containing a difference. 536Each hunk is an arrayref, containing the block of lines. Each line is 537itself an arrayref, for example: 538 539 [ 540 [ '+', 9, 'use Acme::Foo;'], 541 [ '-', 9, 'use Acme::Bar;'], 542 ] 543 544See the documentation on L<Algorithm::Diff> for more details of this structure. 545 546=head1 SEE ALSO 547 548L<Algorithm::Diff>. 549 550=head1 BUGS, PATCHES & FIXES 551 552There are no known bugs at the time of this release. However, if you spot a 553bug or are experiencing difficulties that are not explained within the POD 554documentation, please send an email to barbie@cpan.org or submit a bug to the 555RT system (see link below). However, it would help greatly if you are able to 556pinpoint problems or even supply a patch. 557 558http://rt.cpan.org/Public/Dist/Display.html?Name=VCS-Lite 559 560Fixes are dependent upon their severity and my availability. Should a fix not 561be forthcoming, please feel free to (politely) remind me. 562 563=head1 AUTHOR 564 565 Original Author: Ivor Williams (RIP) 2002-2009 566 Current Maintainer: Barbie <barbie@cpan.org> 2009-2015 567 568=head1 COPYRIGHT 569 570 Copyright (c) Ivor Williams, 2002-2006 571 Copyright (c) Barbie, 2009-2015 572 573=head1 LICENCE 574 575This distribution is free software; you can redistribute it and/or 576modify it under the Artistic Licence v2. 577 578=cut 579