1package Git::SVN::Fetcher; 2use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs 3 $_placeholder_filename @deleted_gpath %added_placeholder 4 $repo_id/; 5use strict; 6use warnings $ENV{GIT_PERL_FATAL_WARNINGS} ? qw(FATAL all) : (); 7use SVN::Delta; 8use Carp qw/croak/; 9use File::Basename qw/dirname/; 10use Git qw/command command_oneline command_noisy command_output_pipe 11 command_input_pipe command_close_pipe 12 command_bidi_pipe command_close_bidi_pipe 13 get_record/; 14BEGIN { 15 @ISA = qw(SVN::Delta::Editor); 16} 17 18# file baton members: path, mode_a, mode_b, pool, fh, blob, base 19sub new { 20 my ($class, $git_svn, $switch_path) = @_; 21 my $self = SVN::Delta::Editor->new; 22 bless $self, $class; 23 if (exists $git_svn->{last_commit}) { 24 $self->{c} = $git_svn->{last_commit}; 25 $self->{empty_symlinks} = 26 _mark_empty_symlinks($git_svn, $switch_path); 27 } 28 29 # some options are read globally, but can be overridden locally 30 # per [svn-remote "..."] section. Command-line options will *NOT* 31 # override options set in an [svn-remote "..."] section 32 $repo_id = $git_svn->{repo_id}; 33 my $k = "svn-remote.$repo_id.ignore-paths"; 34 my $v = eval { command_oneline('config', '--get', $k) }; 35 $self->{ignore_regex} = $v; 36 37 $k = "svn-remote.$repo_id.include-paths"; 38 $v = eval { command_oneline('config', '--get', $k) }; 39 $self->{include_regex} = $v; 40 41 $k = "svn-remote.$repo_id.preserve-empty-dirs"; 42 $v = eval { command_oneline('config', '--get', '--bool', $k) }; 43 if ($v && $v eq 'true') { 44 $_preserve_empty_dirs = 1; 45 $k = "svn-remote.$repo_id.placeholder-filename"; 46 $v = eval { command_oneline('config', '--get', $k) }; 47 $_placeholder_filename = $v; 48 } 49 50 # Load the list of placeholder files added during previous invocations. 51 $k = "svn-remote.$repo_id.added-placeholder"; 52 $v = eval { command_oneline('config', '--get-all', $k) }; 53 if ($_preserve_empty_dirs && $v) { 54 # command() prints errors to stderr, so we only call it if 55 # command_oneline() succeeded. 56 my @v = command('config', '--get-all', $k); 57 $added_placeholder{ dirname($_) } = $_ foreach @v; 58 } 59 60 $self->{empty} = {}; 61 $self->{dir_prop} = {}; 62 $self->{file_prop} = {}; 63 $self->{absent_dir} = {}; 64 $self->{absent_file} = {}; 65 require Git::IndexInfo; 66 $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new }); 67 $self->{pathnameencoding} = Git::config('svn.pathnameencoding'); 68 $self; 69} 70 71# this uses the Ra object, so it must be called before do_{switch,update}, 72# not inside them (when the Git::SVN::Fetcher object is passed) to 73# do_{switch,update} 74sub _mark_empty_symlinks { 75 my ($git_svn, $switch_path) = @_; 76 my $bool = Git::config_bool('svn.brokenSymlinkWorkaround'); 77 return {} if (!defined($bool)) || (defined($bool) && ! $bool); 78 79 my %ret; 80 my ($rev, $cmt) = $git_svn->last_rev_commit; 81 return {} unless ($rev && $cmt); 82 83 # allow the warning to be printed for each revision we fetch to 84 # ensure the user sees it. The user can also disable the workaround 85 # on the repository even while git svn is running and the next 86 # revision fetched will skip this expensive function. 87 my $printed_warning; 88 chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`); 89 my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt); 90 my $pfx = defined($switch_path) ? $switch_path : $git_svn->path; 91 $pfx .= '/' if length($pfx); 92 while (defined($_ = get_record($ls, "\0"))) { 93 s/\A100644 blob $empty_blob\t//o or next; 94 unless ($printed_warning) { 95 print STDERR "Scanning for empty symlinks, ", 96 "this may take a while if you have ", 97 "many empty files\n", 98 "You may disable this with `", 99 "git config svn.brokenSymlinkWorkaround ", 100 "false'.\n", 101 "This may be done in a different ", 102 "terminal without restarting ", 103 "git svn\n"; 104 $printed_warning = 1; 105 } 106 my $path = $_; 107 my (undef, $props) = 108 $git_svn->ra->get_file($pfx.$path, $rev, undef); 109 if ($props->{'svn:special'}) { 110 $ret{$path} = 1; 111 } 112 } 113 command_close_pipe($ls, $ctx); 114 \%ret; 115} 116 117# returns true if a given path is inside a ".git" directory 118sub in_dot_git { 119 $_[0] =~ m{(?:^|/)\.git(?:/|$)}; 120} 121 122# return value: 0 -- don't ignore, 1 -- ignore 123# This will also check whether the path is explicitly included 124sub is_path_ignored { 125 my ($self, $path) = @_; 126 return 1 if in_dot_git($path); 127 return 1 if defined($self->{ignore_regex}) && 128 $path =~ m!$self->{ignore_regex}!; 129 return 0 if defined($self->{include_regex}) && 130 $path =~ m!$self->{include_regex}!; 131 return 0 if defined($_include_regex) && 132 $path =~ m!$_include_regex!; 133 return 1 if defined($self->{include_regex}); 134 return 1 if defined($_include_regex); 135 return 0 unless defined($_ignore_regex); 136 return 1 if $path =~ m!$_ignore_regex!o; 137 return 0; 138} 139 140sub set_path_strip { 141 my ($self, $path) = @_; 142 $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path; 143} 144 145sub open_root { 146 { path => '' }; 147} 148 149sub open_directory { 150 my ($self, $path, $pb, $rev) = @_; 151 { path => $path }; 152} 153 154sub git_path { 155 my ($self, $path) = @_; 156 if (my $enc = $self->{pathnameencoding}) { 157 require Encode; 158 Encode::from_to($path, 'UTF-8', $enc); 159 } 160 if ($self->{path_strip}) { 161 $path =~ s!$self->{path_strip}!! or 162 die "Failed to strip path '$path' ($self->{path_strip})\n"; 163 } 164 $path; 165} 166 167sub delete_entry { 168 my ($self, $path, $rev, $pb) = @_; 169 return undef if $self->is_path_ignored($path); 170 171 my $gpath = $self->git_path($path); 172 return undef if ($gpath eq ''); 173 174 # remove entire directories. 175 my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath") 176 =~ /\A040000 tree ($::oid)\t\Q$gpath\E\0/); 177 if ($tree) { 178 my ($ls, $ctx) = command_output_pipe(qw/ls-tree 179 -r --name-only -z/, 180 $tree); 181 while (defined($_ = get_record($ls, "\0"))) { 182 my $rmpath = "$gpath/$_"; 183 $self->{gii}->remove($rmpath); 184 print "\tD\t$rmpath\n" unless $::_q; 185 } 186 print "\tD\t$gpath/\n" unless $::_q; 187 command_close_pipe($ls, $ctx); 188 } else { 189 $self->{gii}->remove($gpath); 190 print "\tD\t$gpath\n" unless $::_q; 191 } 192 # Don't add to @deleted_gpath if we're deleting a placeholder file. 193 push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)}; 194 $self->{empty}->{$path} = 0; 195 undef; 196} 197 198sub open_file { 199 my ($self, $path, $pb, $rev) = @_; 200 my ($mode, $blob); 201 202 goto out if $self->is_path_ignored($path); 203 204 my $gpath = $self->git_path($path); 205 ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath") 206 =~ /\A(\d{6}) blob ($::oid)\t\Q$gpath\E\0/); 207 unless (defined $mode && defined $blob) { 208 die "$path was not found in commit $self->{c} (r$rev)\n"; 209 } 210 if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) { 211 $mode = '120000'; 212 } 213out: 214 { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob, 215 pool => SVN::Pool->new, action => 'M' }; 216} 217 218sub add_file { 219 my ($self, $path, $pb, $cp_path, $cp_rev) = @_; 220 my $mode; 221 222 if (!$self->is_path_ignored($path)) { 223 my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#); 224 delete $self->{empty}->{$dir}; 225 $mode = '100644'; 226 227 if ($added_placeholder{$dir}) { 228 # Remove our placeholder file, if we created one. 229 delete_entry($self, $added_placeholder{$dir}) 230 unless $path eq $added_placeholder{$dir}; 231 delete $added_placeholder{$dir} 232 } 233 } 234 235 { path => $path, mode_a => $mode, mode_b => $mode, 236 pool => SVN::Pool->new, action => 'A' }; 237} 238 239sub add_directory { 240 my ($self, $path, $cp_path, $cp_rev) = @_; 241 goto out if $self->is_path_ignored($path); 242 my $gpath = $self->git_path($path); 243 if ($gpath eq '') { 244 my ($ls, $ctx) = command_output_pipe(qw/ls-tree 245 -r --name-only -z/, 246 $self->{c}); 247 while (defined($_ = get_record($ls, "\0"))) { 248 $self->{gii}->remove($_); 249 print "\tD\t$_\n" unless $::_q; 250 push @deleted_gpath, $gpath; 251 } 252 command_close_pipe($ls, $ctx); 253 $self->{empty}->{$path} = 0; 254 } 255 my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#); 256 delete $self->{empty}->{$dir}; 257 $self->{empty}->{$path} = 1; 258 259 if ($added_placeholder{$dir}) { 260 # Remove our placeholder file, if we created one. 261 delete_entry($self, $added_placeholder{$dir}); 262 delete $added_placeholder{$dir} 263 } 264 265out: 266 { path => $path }; 267} 268 269sub change_dir_prop { 270 my ($self, $db, $prop, $value) = @_; 271 return undef if $self->is_path_ignored($db->{path}); 272 $self->{dir_prop}->{$db->{path}} ||= {}; 273 $self->{dir_prop}->{$db->{path}}->{$prop} = $value; 274 undef; 275} 276 277sub absent_directory { 278 my ($self, $path, $pb) = @_; 279 return undef if $self->is_path_ignored($path); 280 $self->{absent_dir}->{$pb->{path}} ||= []; 281 push @{$self->{absent_dir}->{$pb->{path}}}, $path; 282 undef; 283} 284 285sub absent_file { 286 my ($self, $path, $pb) = @_; 287 return undef if $self->is_path_ignored($path); 288 $self->{absent_file}->{$pb->{path}} ||= []; 289 push @{$self->{absent_file}->{$pb->{path}}}, $path; 290 undef; 291} 292 293sub change_file_prop { 294 my ($self, $fb, $prop, $value) = @_; 295 return undef if $self->is_path_ignored($fb->{path}); 296 if ($prop eq 'svn:executable') { 297 if ($fb->{mode_b} != 120000) { 298 $fb->{mode_b} = defined $value ? 100755 : 100644; 299 } 300 } elsif ($prop eq 'svn:special') { 301 $fb->{mode_b} = defined $value ? 120000 : 100644; 302 } else { 303 $self->{file_prop}->{$fb->{path}} ||= {}; 304 $self->{file_prop}->{$fb->{path}}->{$prop} = $value; 305 } 306 undef; 307} 308 309sub apply_textdelta { 310 my ($self, $fb, $exp) = @_; 311 return undef if $self->is_path_ignored($fb->{path}); 312 my $suffix = 0; 313 ++$suffix while $::_repository->temp_is_locked("svn_delta_${$}_$suffix"); 314 my $fh = $::_repository->temp_acquire("svn_delta_${$}_$suffix"); 315 # $fh gets auto-closed() by SVN::TxDelta::apply(), 316 # (but $base does not,) so dup() it for reading in close_file 317 open my $dup, '<&', $fh or croak $!; 318 my $base = $::_repository->temp_acquire("git_blob_${$}_$suffix"); 319 # close_file may call temp_acquire on 'svn_hash', but because of the 320 # call chain, if the temp_acquire call from close_file ends up being the 321 # call that first creates the 'svn_hash' temp file, then the FileHandle 322 # that's created as a result will end up in an SVN::Pool that we clear 323 # in SVN::Ra::gs_fetch_loop_common. Avoid that by making sure the 324 # 'svn_hash' FileHandle is already created before close_file is called. 325 my $tmp_fh = $::_repository->temp_acquire('svn_hash'); 326 $::_repository->temp_release($tmp_fh, 1); 327 328 if ($fb->{blob}) { 329 my ($base_is_link, $size); 330 331 if ($fb->{mode_a} eq '120000' && 332 ! $self->{empty_symlinks}->{$fb->{path}}) { 333 print $base 'link ' or die "print $!\n"; 334 $base_is_link = 1; 335 } 336 retry: 337 $size = $::_repository->cat_blob($fb->{blob}, $base); 338 die "Failed to read object $fb->{blob}" if ($size < 0); 339 340 if (defined $exp) { 341 seek $base, 0, 0 or croak $!; 342 my $got = ::md5sum($base); 343 if ($got ne $exp) { 344 my $err = "Checksum mismatch: ". 345 "$fb->{path} $fb->{blob}\n" . 346 "expected: $exp\n" . 347 " got: $got\n"; 348 if ($base_is_link) { 349 warn $err, 350 "Retrying... (possibly ", 351 "a bad symlink from SVN)\n"; 352 $::_repository->temp_reset($base); 353 $base_is_link = 0; 354 goto retry; 355 } 356 die $err; 357 } 358 } 359 } 360 seek $base, 0, 0 or croak $!; 361 $fb->{fh} = $fh; 362 $fb->{base} = $base; 363 [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ]; 364} 365 366sub close_file { 367 my ($self, $fb, $exp) = @_; 368 return undef if $self->is_path_ignored($fb->{path}); 369 370 my $hash; 371 my $path = $self->git_path($fb->{path}); 372 if (my $fh = $fb->{fh}) { 373 if (defined $exp) { 374 seek($fh, 0, 0) or croak $!; 375 my $got = ::md5sum($fh); 376 if ($got ne $exp) { 377 die "Checksum mismatch: $path\n", 378 "expected: $exp\n got: $got\n"; 379 } 380 } 381 if ($fb->{mode_b} == 120000) { 382 sysseek($fh, 0, 0) or croak $!; 383 my $rd = sysread($fh, my $buf, 5); 384 385 if (!defined $rd) { 386 croak "sysread: $!\n"; 387 } elsif ($rd == 0) { 388 warn "$path has mode 120000", 389 " but it points to nothing\n", 390 "converting to an empty file with mode", 391 " 100644\n"; 392 $fb->{mode_b} = '100644'; 393 } elsif ($buf ne 'link ') { 394 warn "$path has mode 120000", 395 " but is not a link\n"; 396 } else { 397 my $tmp_fh = $::_repository->temp_acquire( 398 'svn_hash'); 399 my $res; 400 while ($res = sysread($fh, my $str, 1024)) { 401 my $out = syswrite($tmp_fh, $str, $res); 402 defined($out) && $out == $res 403 or croak("write ", 404 Git::temp_path($tmp_fh), 405 ": $!\n"); 406 } 407 defined $res or croak $!; 408 409 ($fh, $tmp_fh) = ($tmp_fh, $fh); 410 Git::temp_release($tmp_fh, 1); 411 } 412 } 413 414 $hash = $::_repository->hash_and_insert_object( 415 Git::temp_path($fh)); 416 $hash =~ /^$::oid$/ or die "not an object ID: $hash\n"; 417 418 Git::temp_release($fb->{base}, 1); 419 Git::temp_release($fh, 1); 420 } else { 421 $hash = $fb->{blob} or die "no blob information\n"; 422 } 423 $fb->{pool}->clear; 424 $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!; 425 print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q; 426 undef; 427} 428 429sub abort_edit { 430 my $self = shift; 431 $self->{nr} = $self->{gii}->{nr}; 432 delete $self->{gii}; 433 $self->SUPER::abort_edit(@_); 434} 435 436sub close_edit { 437 my $self = shift; 438 439 if ($_preserve_empty_dirs) { 440 my @empty_dirs; 441 442 # Any entry flagged as empty that also has an associated 443 # dir_prop represents a newly created empty directory. 444 foreach my $i (keys %{$self->{empty}}) { 445 push @empty_dirs, $i if exists $self->{dir_prop}->{$i}; 446 } 447 448 # Search for directories that have become empty due subsequent 449 # file deletes. 450 push @empty_dirs, $self->find_empty_directories(); 451 452 # Finally, add a placeholder file to each empty directory. 453 $self->add_placeholder_file($_) foreach (@empty_dirs); 454 455 $self->stash_placeholder_list(); 456 } 457 458 $self->{git_commit_ok} = 1; 459 $self->{nr} = $self->{gii}->{nr}; 460 delete $self->{gii}; 461 $self->SUPER::close_edit(@_); 462} 463 464sub find_empty_directories { 465 my ($self) = @_; 466 my @empty_dirs; 467 my %dirs = map { dirname($_) => 1 } @deleted_gpath; 468 469 foreach my $dir (sort keys %dirs) { 470 next if $dir eq "."; 471 472 # If there have been any additions to this directory, there is 473 # no reason to check if it is empty. 474 my $skip_added = 0; 475 foreach my $t (qw/dir_prop file_prop/) { 476 foreach my $path (keys %{ $self->{$t} }) { 477 if (exists $self->{$t}->{dirname($path)}) { 478 $skip_added = 1; 479 last; 480 } 481 } 482 last if $skip_added; 483 } 484 next if $skip_added; 485 486 # Use `git ls-tree` to get the filenames of this directory 487 # that existed prior to this particular commit. 488 my $ls = command('ls-tree', '-z', '--name-only', 489 $self->{c}, "$dir/"); 490 my %files = map { $_ => 1 } split(/\0/, $ls); 491 492 # Remove the filenames that were deleted during this commit. 493 delete $files{$_} foreach (@deleted_gpath); 494 495 # Report the directory if there are no filenames left. 496 push @empty_dirs, $dir unless (scalar %files); 497 } 498 @empty_dirs; 499} 500 501sub add_placeholder_file { 502 my ($self, $dir) = @_; 503 my $path = "$dir/$_placeholder_filename"; 504 my $gpath = $self->git_path($path); 505 506 my $fh = $::_repository->temp_acquire($gpath); 507 my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh)); 508 Git::temp_release($fh, 1); 509 $self->{gii}->update('100644', $hash, $gpath) or croak $!; 510 511 # The directory should no longer be considered empty. 512 delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir}; 513 514 # Keep track of any placeholder files we create. 515 $added_placeholder{$dir} = $path; 516} 517 518sub stash_placeholder_list { 519 my ($self) = @_; 520 my $k = "svn-remote.$repo_id.added-placeholder"; 521 my $v = eval { command_oneline('config', '--get-all', $k) }; 522 command_noisy('config', '--unset-all', $k) if $v; 523 foreach (values %added_placeholder) { 524 command_noisy('config', '--add', $k, $_); 525 } 526} 527 5281; 529__END__ 530 531=head1 NAME 532 533Git::SVN::Fetcher - tree delta consumer for "git svn fetch" 534 535=head1 SYNOPSIS 536 537 use SVN::Core; 538 use SVN::Ra; 539 use Git::SVN; 540 use Git::SVN::Fetcher; 541 use Git; 542 543 my $gs = Git::SVN->find_by_url($url); 544 my $ra = SVN::Ra->new(url => $url); 545 my $editor = Git::SVN::Fetcher->new($gs); 546 my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '', 547 1, $editor); 548 $reporter->set_path('', $old_rev, 0); 549 $reporter->finish_report; 550 my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') }); 551 552 foreach my $path (keys %{$editor->{dir_prop}) { 553 my $props = $editor->{dir_prop}{$path}; 554 foreach my $prop (keys %$props) { 555 print "property $prop at $path changed to $props->{$prop}\n"; 556 } 557 } 558 foreach my $path (keys %{$editor->{empty}) { 559 my $action = $editor->{empty}{$path} ? 'added' : 'removed'; 560 print "empty directory $path $action\n"; 561 } 562 foreach my $path (keys %{$editor->{file_prop}) { ... } 563 foreach my $parent (keys %{$editor->{absent_dir}}) { 564 my @children = @{$editor->{abstent_dir}{$parent}}; 565 print "cannot fetch directory $parent/$_: not authorized?\n" 566 foreach @children; 567 } 568 foreach my $parent (keys %{$editor->{absent_file}) { ... } 569 570=head1 DESCRIPTION 571 572This is a subclass of C<SVN::Delta::Editor>, which means it implements 573callbacks to act as a consumer of Subversion tree deltas. This 574particular implementation of those callbacks is meant to store 575information about the resulting content which B<git svn fetch> could 576use to populate new commits and new entries for F<unhandled.log>. 577More specifically: 578 579=over 580 581=item * Additions, removals, and modifications of files are propagated 582to git-svn's index file F<$GIT_DIR/svn/$refname/index> using 583B<git update-index>. 584 585=item * Changes in Subversion path properties are recorded in the 586C<dir_prop> and C<file_prop> fields (which are hashes). 587 588=item * Addition and removal of empty directories are indicated by 589entries with value 1 and 0 respectively in the C<empty> hash. 590 591=item * Paths that are present but cannot be conveyed (presumably due 592to permissions) are recorded in the C<absent_file> and 593C<absent_dirs> hashes. For each key, the corresponding value is 594a list of paths under that directory that were present but 595could not be conveyed. 596 597=back 598 599The interface is unstable. Do not use this module unless you are 600developing git-svn. 601 602=head1 DEPENDENCIES 603 604L<SVN::Delta> from the Subversion perl bindings, 605the core L<Carp> and L<File::Basename> modules, 606and git's L<Git> helper module. 607 608C<Git::SVN::Fetcher> has not been tested using callers other than 609B<git-svn> itself. 610 611=head1 SEE ALSO 612 613L<SVN::Delta>, 614L<Git::SVN::Editor>. 615 616=head1 INCOMPATIBILITIES 617 618None reported. 619 620=head1 BUGS 621 622None. 623