1package VCS::Lite::Delta;
2
3use strict;
4use warnings;
5our $VERSION = '0.12';
6
7#----------------------------------------------------------------------------
8
9=head1 NAME
10
11VCS::Lite::Delta - VCS::Lite differences
12
13=head1 SYNOPSIS
14
15  use VCS::Lite;
16
17  # diff
18
19  my $lit = VCS::Lite->new('/home/me/foo1.txt');
20  my $lit2 = VCS::Lite->new('/home/me/foo2.txt');
21  my $difftxt = $lit->delta($lit2)->diff;
22  print OUTFILE $difftxt;
23
24  # patch
25
26  my $delt = VCS::Lite::Delta->new('/home/me/patch.diff');
27  my $lit3 = $lit->patch($delt);
28  print OUTFILE $lit3->text;
29
30=head1 DESCRIPTION
31
32This module provides a Delta class for the differencing functionality of
33VCS::Lite
34
35=cut
36
37#----------------------------------------------------------------------------
38
39#############################################################################
40#Library Modules															#
41#############################################################################
42
43use Carp;
44
45#----------------------------------------------------------------------------
46
47# Error handling, use package vars to control it for now.
48use vars qw($error_action $error_msg $error_line);
49
50#----------------------------------------------------------------------------
51
52#############################################################################
53#Interface Methods   														#
54#############################################################################
55
56sub new {
57    my $class = shift;
58    my $src   = shift;
59
60    # DWIM logic, based on $src parameter.
61
62    # Case 0: string. Use $id as file name, becomes case 2
63    if ( !ref $src ) {
64        open my $fh, $src or croak("failed to open '$src': $!");
65        $src = $fh;    # becomes case 2 below
66    }
67    my $atyp = ref $src;
68
69    # Case 1: $src is arrayref
70    return bless {
71        id1  => $_[0],
72        id2  => $_[1],
73        sep  => $_[2],
74        diff => [@$src]
75      },
76      $class
77      if $atyp eq 'ARRAY';
78
79    my $sep = shift;
80    my %proto;
81
82    # Decode $sep as needed
83
84	if (ref($sep) eq 'HASH') {
85	    %proto = %$sep;
86	    $sep = $proto{in};
87        delete $proto{in};
88	}
89
90    $sep ||= $/;
91    local $/ = $sep if $sep;
92    $sep ||= '';
93    my @diff;
94
95    # Case 2: $src is globref (file handle) - slurp file
96    if ( $atyp eq 'GLOB' ) {
97        @diff = <$src>;
98    }
99
100    # Case 3: $src is scalar ref (string)
101    elsif ( $atyp eq 'SCALAR' ) {
102        @diff = split /(?=$sep)/, $$src;
103    }
104
105    # Case otherwise is an error.
106    else {
107        croak "Invalid argument to VCS::Lite::Delta::new";
108    }
109
110    # If we have reached this point, we have been passed something in a
111    # text/diff format. It could be diff or udiff format.
112
113    my ( $id1, $id2 ) = @_;
114    my @out;
115
116    if ( $diff[0] =~ /^---/ ) {    # udiff format
117        my $state = 'inputdef';
118        my ( $a_line, $a_count, @a_hunk, $b_line, $b_count, @b_hunk );
119        for my $lin ( 0 .. $#diff ) {
120            local $_ = $diff[$lin];
121            chomp if $proto{chomp};
122            # inputdef = --- and +++ to identify the files being diffed
123
124            if ( $state eq 'inputdef' ) {
125                $id1 = $1 if /^---	# ---
126						\s
127						(\S+)/x;                     # file => $1
128                $id2 = $1 if /^\+{3}	# +++
129						\s
130						(\S+)/x;                     # file => $1
131                $state = 'patch' if /^\@\@/;
132            }
133
134            # patch expects @@ -a,b +c,d @@
135
136            if ( $state eq 'patch' ) {
137                next unless /^\@\@
138						\s+
139						-
140						(\d+)	# line of file 1 => $1
141						,
142						(\d+)	# count of file 1 => $2
143						\s*
144						\+
145						(\d+)	# line of file 2 => $3
146						,
147						(\d+)	# count of file 2 => $4
148						\s*
149						\@\@/x;
150                $a_line  = $1 - 1;
151                $a_count = $2;
152                $b_line  = $3 - 1;
153                $b_count = $4;
154                $state   = 'detail';
155                next;
156            }
157
158            # detail expects [-+ ]line of text
159
160            if ( $state eq 'detail' ) {
161                my $ind = substr $_, 0, 1, '';
162                _error( $lin, 'Bad diff' ), return undef
163                  unless $ind =~ /[ +\-i\\]/;
164
165                next if $ind eq '\\';
166
167                #[- ]line, add to @a_hunk
168                if ( $ind ne '+' ) {
169                    my $lead = '-';
170                    if (($lin < $#diff) && $diff[$lin+1] =~ /^\\/) {
171                        $lead .= '/';
172                        s/$sep$//s;
173                    }
174                    push @a_hunk, [ $lead, $a_line++, $_ ];
175                    $a_count--;
176                    _error( $lin, 'Too large diff' ), return undef
177                      if $a_count < 0;
178                }
179
180                #[+ ]line, add to @b_hunk
181                if ( $ind ne '-' ) {
182                    my $lead = '+';
183                    if (($lin < $#diff) && $diff[$lin+1] =~ /^\\/) {
184                        $lead .= '/';
185                        s/$sep$//s;
186                    }
187                    push @b_hunk, [ $lead, $b_line++, $_ ];
188                    $b_count--;
189                    _error( $lin, 'Too large diff' ), return undef
190                      if $b_count < 0;
191                }
192
193                # are we there yet, daddy?
194                if ( !$a_count and !$b_count ) {
195                    push @out, [ @a_hunk, @b_hunk ];
196                    @a_hunk = @b_hunk = ();
197                    $state = 'patch';
198                }
199            }
200        }    # next line of patch
201        return bless {
202            id1  => $id1,
203            id2  => $id2,
204            sep  => $sep,
205            diff => \@out,
206            %proto
207        }, $class;
208    }
209
210    # not a udiff mode patch, assume straight diff mode
211
212    my $state = 'patch';
213    my ( $a_line, $a_count, @a_hunk, $b_line, $b_count, @b_hunk );
214    for my $lin ( 0 .. $#diff ) {
215        local $_ = $diff[$lin];
216        chomp if $proto{chomp};
217
218        # patch expects ww,xx[acd]yy,zz style
219
220        if ( $state eq 'patch' ) {
221            next unless /^(\d+)	# start line of file 1 => $1
222				(?:,(\d+))?	# end line of file 1 => $2
223				([acd])		# Add, change, delete => $3
224				(\d+)		# start line of file 2 => $4
225				(?:,(\d+))?	# end line of file 2 => $5
226				/x;
227            $a_line  = $1 - 1;
228            $a_count = $2 ? ( $2 - $a_line ) : 1;
229            $b_line  = $4 - 1;
230            $b_count = $5 ? ( $5 - $b_line ) : 1;
231            $a_count = 0 if $3 eq 'a';
232            $b_count = 0 if $3 eq 'd';
233            $state   = 'detail';
234            next;
235        }
236
237        # detail expects < lines --- > lines
238
239        if ( $state eq 'detail' ) {
240            next if /^---/;    # ignore separator
241            my $ind = substr $_, 0, 2, '';
242            _error( $lin, 'Bad diff' ), return undef
243              unless $ind =~ /[<>\\] /;
244
245            # < line goes to @a_hunk
246            if ( $ind eq '< ' ) {
247                my $lead = '-';
248                if (($lin < $#diff) && $diff[$lin+1] =~ /^\\/) {
249                    $lead .= '/';
250                    s/$sep$//s;
251                }
252                push @a_hunk, [ $lead, $a_line++, $_ ];
253                $a_count--;
254                _error( $lin, 'Too large diff' ), return undef
255                  if $a_count < 0;
256            }
257
258            # > line goes to @b_hunk
259            if ( $ind eq '> ' ) {
260                my $lead = '+';
261                if (($lin < $#diff) && $diff[$lin+1] =~ /^\\/) {
262                    $lead .= '/';
263                    s/$sep$//s;
264                }
265                push @b_hunk, [ $lead, $b_line++, $_ ];
266                $b_count--;
267                _error( $lin, 'Too large diff' ), return undef
268                  if $b_count < 0;
269            }
270
271            # are we there yet, daddy?
272            if ( !$a_count and !$b_count ) {
273                push @out, [ @a_hunk, @b_hunk ];
274                @a_hunk = @b_hunk = ();
275                $state = 'patch';
276            }
277        }
278    }
279    return bless {
280        id1  => $id1,
281        id2  => $id2,
282        sep  => $sep,
283        diff => \@out,
284        %proto
285    }, $class;
286}
287
288sub _error {
289    ( $error_line, my $msg ) = @_;
290
291    $error_msg = "Line $error_line: $msg";
292
293    goto &$error_action if ref($error_action) eq 'CODE';
294    confess $error_msg  if $error_action      eq 'raise';
295
296    print STDERR $error_msg, "\n" unless $error_action eq 'silent';
297}
298
299sub _diff_hunk {
300
301    my $sep           = shift;
302    my $r_line_offset = shift;
303
304    my @ins;
305    my ( $ins_firstline, $ins_lastline ) = ( 0, 0 );
306    my @del;
307    my ( $del_firstline, $del_lastline ) = ( 0, 0 );
308    my $op;
309    my $shortins = '';
310    my $shortdel = '';
311
312    # construct @ins and @del from hunk
313
314    for (@_) {
315        my ( $typ, $lno, $txt ) = @$_;
316        my $short = substr($typ, 1, 1, '');
317        $lno++;
318        if ( $typ eq '+' ) {
319            push @ins, $txt;
320            $ins_firstline ||= $lno;
321            $ins_lastline = $lno;
322            $shortins = "\n\\ No newline at end of file\n" if $short;
323        }
324        else {
325            push @del, $txt;
326            $del_firstline ||= $lno;
327            $del_lastline = $lno;
328            $shortdel = "\n\\ No newline at end of file\n" if $short;
329        }
330    }
331
332    # Work out whether we are a, c or d
333
334    if ( !@del ) {
335        $op            = 'a';
336        $del_firstline = $ins_firstline - $$r_line_offset - 1;
337    }
338    elsif ( !@ins ) {
339        $op            = 'd';
340        $ins_firstline = $del_firstline + $$r_line_offset - 1;
341    }
342    else {
343        $op = 'c';
344    }
345
346    $$r_line_offset += @ins - @del;
347
348    $ins_lastline ||= $ins_firstline;
349    $del_lastline ||= $del_firstline;
350
351    # Make the header line
352
353    my $outstr =
354      "$del_firstline,$del_lastline$op$ins_firstline,$ins_lastline\n";
355    $outstr =~ s/(^|\D)(\d+),\2(?=\D|$)/$1$2/g;
356
357    # < deletions
358    for (@del) {
359        $outstr .= '< ' . $_ . $sep;
360    }
361    $outstr .= $shortdel;
362
363    # ---
364    $outstr .= "---\n" if @ins && @del;
365
366    # > insertions
367    for (@ins) {
368        $outstr .= '> ' . $_ . $sep;
369    }
370    $outstr .= $shortins;
371
372    $outstr;
373}
374
375sub diff {
376    my $self = shift;
377    my $sep  = shift || $self->{sep} || '';
378
379    my $off = 0;
380
381    join '', map { _diff_hunk( $sep, \$off, @$_ ) } @{ $self->{diff} };
382}
383
384sub udiff {
385    my $self = shift;
386    my $sep  = shift || $self->{sep} || '';
387
388    my ( $in, $out, $diff ) = @{$self}{qw/id1 id2 diff/};
389
390    # Header with file names
391
392    my @out = ( "--- $in \n", "+++ $out \n" );
393
394    my $offset = 0;
395
396    for (@$diff) {
397        my @t1 = grep { $_->[0] =~ /^\-/ } @$_;
398        my @t2 = grep { $_->[0] =~ /^\+/ } @$_;
399
400        my $short1 = '';
401        $short1 = "\n\\ No newline at end of file\n"
402            if grep { $_->[0] eq '-/' } @t1;
403        my $short2 = '';
404        $short2 = "\n\\ No newline at end of file\n"
405            if grep { $_->[0] eq '+/' } @t2;
406
407        # Work out base line numbers in both files
408
409        my $base1 = @t1 ? $t1[0][1] : $t2[0][1] - $offset;
410        my $base2 = @t2 ? $t2[0][1] : $t1[0][1] + $offset;
411        $base1++;
412        $base2++;    # Our lines were 0 based
413        $offset += @t2 - @t1;
414        my $count1 = @t1;
415        my $count2 = @t2;
416
417        # Header line
418        push @out, "@@ -$base1,$count1 +$base2,$count2 @@\n";
419
420        # Use Algorithm::Diff::sdiff to munge out any lines in common inside
421        # the hunk
422        my @txt1 = map { $_->[2] } @t1;
423        my @txt2 = map { $_->[2] } @t2;
424
425        my @ad = Algorithm::Diff::sdiff( \@txt1, \@txt2 );
426        my @defer;
427
428 # for each subhunk, we want all the file1 lines first, then all the file2 lines
429
430        for (@ad) {
431            my ( $ind, $txt1, $txt2 ) = @$_;
432
433     # we want to flush out the + lines when we run off the end of a 'c' section
434
435            ( push @out, @defer ), @defer = () unless $ind eq 'c';
436
437            # unchanged lines, just wack 'em out
438            ( push @out, ' ' . $txt1 . $sep ), next if $ind eq 'u';
439
440            # output original line (- line)
441            push @out, '-' . $txt1 . $sep unless $ind eq '+';
442
443            # defer changed + lines
444            push @defer, '+' . $txt2 . $sep unless $ind eq '-';
445        }
446        push @out, $short1;
447
448        # and flush at the end
449        push @out, @defer, $short2;
450    }
451    wantarray ? @out : join '', @out;
452}
453
454sub id {
455    my $self = shift;
456
457    if (@_) {
458        $self->{id1} = shift;
459        $self->{id2} = shift;
460    }
461
462    @{$self}{qw/id1 id2/};
463}
464
465sub hunks {
466    my $self = shift;
467
468    @{ $self->{diff} };
469}
470
4711;
472
473__END__
474
475#----------------------------------------------------------------------------
476
477=head1 API
478
479=head2 new
480
481The underlying object of VCS::Lite::Delta is an array of difference
482chunks (hunks) such as that returned by Algorithm::Diff.
483
484The constructor takes the following forms:
485
486  my $delt = VCS::Lite::Delta->new( '/my/file.diff',$sep); # File name
487  my $delt = VCS::Lite::Delta->new( \*FILE,$sep);	# File handle
488  my $delt = VCS::Lite::Delta->new( \$string,$sep); # String as scalar ref
489  my $delt = VCS::Lite::Delta->new( \@foo, $id1, $id2) # Array ref
490
491$sep here is a regexp by which to split strings into tokens.
492The default is to use the natural perl mechanism of $/ (which is emulated
493when not reading from a file). The arrayref form is assuming an array of
494hunks such as the output from L<Algorithm::Diff::diff>.
495
496The other forms assume the input is the text form of a diff listing,
497either in diff format, or in unified format. The input is parsed, and errors
498are reported.
499
500=head2 diff
501
502  print OUTFILE $delt->diff
503
504This generates a standard diff format, for example:
505
5064c4
507< Now wherefore stopp'st thou me?
508---
509> Now wherefore stoppest thou me?
510
511=head2 udiff
512
513  print OUTFILE $delt->udiff
514
515This generates a unified diff (like diff -u) similar to the form in which
516patches are submitted.
517
518=head2 id
519
520  my ($id1,$id2) = $delt->id;
521  $delt2->id('foo.pl@@1','foo.pl@@3')
522
523The I<id> method allows get and set of the names associated with the two
524elements being diffed. The id is set for delta objects returned by
525VCS::Lite->diff, to the element IDs of the VCS::Lite objects being diffed.
526
527Diff format omits the file names, hence the IDs will not be populated by
528new. This is not the case with diff -u format, which includes the file
529names which are passed in and available as IDs.
530
531=head2 hunks
532
533  my @hunklist = $delt->hunks
534
535A hunk is a technical term for a section of input containing a difference.
536Each hunk is an arrayref, containing the block of lines. Each line is
537itself an arrayref, for example:
538
539  [
540    [ '+', 9, 'use Acme::Foo;'],
541    [ '-', 9, 'use Acme::Bar;'],
542  ]
543
544See the documentation on L<Algorithm::Diff> for more details of this structure.
545
546=head1 SEE ALSO
547
548L<Algorithm::Diff>.
549
550=head1 BUGS, PATCHES & FIXES
551
552There are no known bugs at the time of this release. However, if you spot a
553bug or are experiencing difficulties that are not explained within the POD
554documentation, please send an email to barbie@cpan.org or submit a bug to the
555RT system (see link below). However, it would help greatly if you are able to
556pinpoint problems or even supply a patch.
557
558http://rt.cpan.org/Public/Dist/Display.html?Name=VCS-Lite
559
560Fixes are dependent upon their severity and my availability. Should a fix not
561be forthcoming, please feel free to (politely) remind me.
562
563=head1 AUTHOR
564
565  Original Author: Ivor Williams (RIP)          2002-2009
566  Current Maintainer: Barbie <barbie@cpan.org>  2009-2015
567
568=head1 COPYRIGHT
569
570  Copyright (c) Ivor Williams, 2002-2006
571  Copyright (c) Barbie,        2009-2015
572
573=head1 LICENCE
574
575This distribution is free software; you can redistribute it and/or
576modify it under the Artistic Licence v2.
577
578=cut
579