1package Git::SVN;
2use strict;
3use warnings $ENV{GIT_PERL_FATAL_WARNINGS} ? qw(FATAL all) : ();
4use Fcntl qw/:DEFAULT :seek/;
5use constant rev_map_fmt => 'NH*';
6use vars qw/$_no_metadata
7            $_repack $_repack_flags $_use_svm_props $_head
8            $_use_svnsync_props $no_reuse_existing
9	    $_use_log_author $_add_author_from $_localtime/;
10use Carp qw/croak/;
11use File::Path qw/mkpath/;
12use IPC::Open3;
13use Memoize;  # core since 5.8.0, Jul 2002
14use POSIX qw(:signal_h);
15use Time::Local;
16
17use Git qw(
18    command
19    command_oneline
20    command_noisy
21    command_output_pipe
22    command_close_pipe
23    get_tz_offset
24);
25use Git::SVN::Utils qw(
26	fatal
27	can_compress
28	join_paths
29	canonicalize_path
30	canonicalize_url
31	add_path_to_url
32);
33
34my $memo_backend;
35our $_follow_parent  = 1;
36our $_minimize_url   = 'unset';
37our $default_repo_id = 'svn';
38our $default_ref_id  = $ENV{GIT_SVN_ID} || 'git-svn';
39
40my ($_gc_nr, $_gc_period);
41
42# properties that we do not log:
43my %SKIP_PROP;
44BEGIN {
45	%SKIP_PROP = map { $_ => 1 } qw/svn:wc:ra_dav:version-url
46	                                svn:special svn:executable
47	                                svn:entry:committed-rev
48	                                svn:entry:last-author
49	                                svn:entry:uuid
50	                                svn:entry:committed-date/;
51
52	# some options are read globally, but can be overridden locally
53	# per [svn-remote "..."] section.  Command-line options will *NOT*
54	# override options set in an [svn-remote "..."] section
55	no strict 'refs';
56	for my $option (qw/follow_parent no_metadata use_svm_props
57			   use_svnsync_props/) {
58		my $key = $option;
59		$key =~ tr/_//d;
60		my $prop = "-$option";
61		*$option = sub {
62			my ($self) = @_;
63			return $self->{$prop} if exists $self->{$prop};
64			my $k = "svn-remote.$self->{repo_id}.$key";
65			eval { command_oneline(qw/config --get/, $k) };
66			if ($@) {
67				$self->{$prop} = ${"Git::SVN::_$option"};
68			} else {
69				my $v = command_oneline(qw/config --bool/,$k);
70				$self->{$prop} = $v eq 'false' ? 0 : 1;
71			}
72			return $self->{$prop};
73		}
74	}
75}
76
77
78my (%LOCKFILES, %INDEX_FILES);
79END {
80	unlink keys %LOCKFILES if %LOCKFILES;
81	unlink keys %INDEX_FILES if %INDEX_FILES;
82}
83
84sub resolve_local_globs {
85	my ($url, $fetch, $glob_spec) = @_;
86	return unless defined $glob_spec;
87	my $ref = $glob_spec->{ref};
88	my $path = $glob_spec->{path};
89	foreach (command(qw#for-each-ref --format=%(refname) refs/#)) {
90		next unless m#^$ref->{regex}$#;
91		my $p = $1;
92		my $pathname = desanitize_refname($path->full_path($p));
93		my $refname = desanitize_refname($ref->full_path($p));
94		if (my $existing = $fetch->{$pathname}) {
95			if ($existing ne $refname) {
96				die "Refspec conflict:\n",
97				    "existing: $existing\n",
98				    " globbed: $refname\n";
99			}
100			my $u = (::cmt_metadata("$refname"))[0];
101			if (!defined($u)) {
102				warn
103"W: $refname: no associated commit metadata from SVN, skipping\n";
104				next;
105			}
106			$u =~ s!^\Q$url\E(/|$)!! or die
107			  "$refname: '$url' not found in '$u'\n";
108			if ($pathname ne $u) {
109				warn "W: Refspec glob conflict ",
110				     "(ref: $refname):\n",
111				     "expected path: $pathname\n",
112				     "    real path: $u\n",
113				     "Continuing ahead with $u\n";
114				next;
115			}
116		} else {
117			$fetch->{$pathname} = $refname;
118		}
119	}
120}
121
122sub parse_revision_argument {
123	my ($base, $head) = @_;
124	if (!defined $::_revision || $::_revision eq 'BASE:HEAD') {
125		return ($base, $head);
126	}
127	return ($1, $2) if ($::_revision =~ /^(\d+):(\d+)$/);
128	return ($::_revision, $::_revision) if ($::_revision =~ /^\d+$/);
129	return ($head, $head) if ($::_revision eq 'HEAD');
130	return ($base, $1) if ($::_revision =~ /^BASE:(\d+)$/);
131	return ($1, $head) if ($::_revision =~ /^(\d+):HEAD$/);
132	die "revision argument: $::_revision not understood by git-svn\n";
133}
134
135sub fetch_all {
136	my ($repo_id, $remotes) = @_;
137	if (ref $repo_id) {
138		my $gs = $repo_id;
139		$repo_id = undef;
140		$repo_id = $gs->{repo_id};
141	}
142	$remotes ||= read_all_remotes();
143	my $remote = $remotes->{$repo_id} or
144	             die "[svn-remote \"$repo_id\"] unknown\n";
145	my $fetch = $remote->{fetch};
146	my $url = $remote->{url} or die "svn-remote.$repo_id.url not defined\n";
147	my (@gs, @globs);
148	my $ra = Git::SVN::Ra->new($url);
149	my $uuid = $ra->get_uuid;
150	my $head = $ra->get_latest_revnum;
151
152	# ignore errors, $head revision may not even exist anymore
153	eval { $ra->get_log("", $head, 0, 1, 0, 1, sub { $head = $_[1] }) };
154	warn "W: $@\n" if $@;
155
156	my $base = defined $fetch ? $head : 0;
157
158	# read the max revs for wildcard expansion (branches/*, tags/*)
159	foreach my $t (qw/branches tags/) {
160		defined $remote->{$t} or next;
161		push @globs, @{$remote->{$t}};
162
163		my $max_rev = eval { tmp_config(qw/--int --get/,
164		                         "svn-remote.$repo_id.${t}-maxRev") };
165		if (defined $max_rev && ($max_rev < $base)) {
166			$base = $max_rev;
167		} elsif (!defined $max_rev) {
168			$base = 0;
169		}
170	}
171
172	if ($fetch) {
173		foreach my $p (sort keys %$fetch) {
174			my $gs = Git::SVN->new($fetch->{$p}, $repo_id, $p);
175			my $lr = $gs->rev_map_max;
176			if (defined $lr) {
177				$base = $lr if ($lr < $base);
178			}
179			push @gs, $gs;
180		}
181	}
182
183	($base, $head) = parse_revision_argument($base, $head);
184	$ra->gs_fetch_loop_common($base, $head, \@gs, \@globs);
185}
186
187sub read_all_remotes {
188	my $r = {};
189	my $use_svm_props = eval { command_oneline(qw/config --bool
190	    svn.useSvmProps/) };
191	$use_svm_props = $use_svm_props eq 'true' if $use_svm_props;
192	my $svn_refspec = qr{\s*(.*?)\s*:\s*(.+?)\s*};
193	foreach (grep { s/^svn-remote\.// } command(qw/config -l/)) {
194		if (m!^(.+)\.fetch=$svn_refspec$!) {
195			my ($remote, $local_ref, $remote_ref) = ($1, $2, $3);
196			die("svn-remote.$remote: remote ref '$remote_ref' "
197			    . "must start with 'refs/'\n")
198				unless $remote_ref =~ m{^refs/};
199			$local_ref = uri_decode($local_ref);
200			$r->{$remote}->{fetch}->{$local_ref} = $remote_ref;
201			$r->{$remote}->{svm} = {} if $use_svm_props;
202		} elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) {
203			$r->{$1}->{svm} = {};
204		} elsif (m!^(.+)\.url=\s*(.*)\s*$!) {
205			$r->{$1}->{url} = canonicalize_url($2);
206		} elsif (m!^(.+)\.pushurl=\s*(.*)\s*$!) {
207			$r->{$1}->{pushurl} = canonicalize_url($2);
208		} elsif (m!^(.+)\.ignore-refs=\s*(.*)\s*$!) {
209			$r->{$1}->{ignore_refs_regex} = $2;
210		} elsif (m!^(.+)\.(branches|tags)=$svn_refspec$!) {
211			my ($remote, $t, $local_ref, $remote_ref) =
212			                                     ($1, $2, $3, $4);
213			die("svn-remote.$remote: remote ref '$remote_ref' ($t) "
214			    . "must start with 'refs/'\n")
215				unless $remote_ref =~ m{^refs/};
216			$local_ref = uri_decode($local_ref);
217
218			require Git::SVN::GlobSpec;
219			my $rs = {
220			    t => $t,
221			    remote => $remote,
222			    path => Git::SVN::GlobSpec->new($local_ref, 1),
223			    ref => Git::SVN::GlobSpec->new($remote_ref, 0) };
224			if (length($rs->{ref}->{right}) != 0) {
225				die "The '*' glob character must be the last ",
226				    "character of '$remote_ref'\n";
227			}
228			push @{ $r->{$remote}->{$t} }, $rs;
229		}
230	}
231
232	map {
233		if (defined $r->{$_}->{svm}) {
234			my $svm;
235			eval {
236				my $section = "svn-remote.$_";
237				$svm = {
238					source => tmp_config('--get',
239					    "$section.svm-source"),
240					replace => tmp_config('--get',
241					    "$section.svm-replace"),
242				}
243			};
244			$r->{$_}->{svm} = $svm;
245		}
246	} keys %$r;
247
248	foreach my $remote (keys %$r) {
249		foreach ( grep { defined $_ }
250			  map { $r->{$remote}->{$_} } qw(branches tags) ) {
251			foreach my $rs ( @$_ ) {
252				$rs->{ignore_refs_regex} =
253				    $r->{$remote}->{ignore_refs_regex};
254			}
255		}
256	}
257
258	$r;
259}
260
261sub init_vars {
262	$_gc_nr = $_gc_period = 1000;
263	if (defined $_repack || defined $_repack_flags) {
264	       warn "Repack options are obsolete; they have no effect.\n";
265	}
266}
267
268sub verify_remotes_sanity {
269	return unless -d $ENV{GIT_DIR};
270	my %seen;
271	foreach (command(qw/config -l/)) {
272		if (m!^svn-remote\.(?:.+)\.fetch=.*:refs/remotes/(\S+)\s*$!) {
273			if ($seen{$1}) {
274				die "Remote ref refs/remote/$1 is tracked by",
275				    "\n  \"$_\"\nand\n  \"$seen{$1}\"\n",
276				    "Please resolve this ambiguity in ",
277				    "your git configuration file before ",
278				    "continuing\n";
279			}
280			$seen{$1} = $_;
281		}
282	}
283}
284
285sub find_existing_remote {
286	my ($url, $remotes) = @_;
287	return undef if $no_reuse_existing;
288	my $existing;
289	foreach my $repo_id (keys %$remotes) {
290		my $u = $remotes->{$repo_id}->{url} or next;
291		next if $u ne $url;
292		$existing = $repo_id;
293		last;
294	}
295	$existing;
296}
297
298sub init_remote_config {
299	my ($self, $url, $no_write) = @_;
300	$url = canonicalize_url($url);
301	my $r = read_all_remotes();
302	my $existing = find_existing_remote($url, $r);
303	if ($existing) {
304		unless ($no_write) {
305			print STDERR "Using existing ",
306				     "[svn-remote \"$existing\"]\n";
307		}
308		$self->{repo_id} = $existing;
309	} elsif ($_minimize_url) {
310		my $min_url = Git::SVN::Ra->new($url)->minimize_url;
311		$existing = find_existing_remote($min_url, $r);
312		if ($existing) {
313			unless ($no_write) {
314				print STDERR "Using existing ",
315					     "[svn-remote \"$existing\"]\n";
316			}
317			$self->{repo_id} = $existing;
318		}
319		if ($min_url ne $url) {
320			unless ($no_write) {
321				print STDERR "Using higher level of URL: ",
322					     "$url => $min_url\n";
323			}
324			my $old_path = $self->path;
325			$url =~ s!^\Q$min_url\E(/|$)!!;
326			$url = join_paths($url, $old_path);
327			$self->path($url);
328			$url = $min_url;
329		}
330	}
331	my $orig_url;
332	if (!$existing) {
333		# verify that we aren't overwriting anything:
334		$orig_url = eval {
335			command_oneline('config', '--get',
336					"svn-remote.$self->{repo_id}.url")
337		};
338		if ($orig_url && ($orig_url ne $url)) {
339			die "svn-remote.$self->{repo_id}.url already set: ",
340			    "$orig_url\nwanted to set to: $url\n";
341		}
342	}
343	my ($xrepo_id, $xpath) = find_ref($self->refname);
344	if (!$no_write && defined $xpath) {
345		die "svn-remote.$xrepo_id.fetch already set to track ",
346		    "$xpath:", $self->refname, "\n";
347	}
348	unless ($no_write) {
349		command_noisy('config',
350			      "svn-remote.$self->{repo_id}.url", $url);
351		my $path = $self->path;
352		$path =~ s{^/}{};
353		$path =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
354		$self->path($path);
355		command_noisy('config', '--add',
356			      "svn-remote.$self->{repo_id}.fetch",
357			      $self->path.":".$self->refname);
358	}
359	$self->url($url);
360}
361
362sub find_by_url { # repos_root and, path are optional
363	my ($class, $full_url, $repos_root, $path) = @_;
364
365	$full_url = canonicalize_url($full_url);
366
367	return undef unless defined $full_url;
368	remove_username($full_url);
369	remove_username($repos_root) if defined $repos_root;
370	my $remotes = read_all_remotes();
371	if (defined $full_url && defined $repos_root && !defined $path) {
372		$path = $full_url;
373		$path =~ s#^\Q$repos_root\E(?:/|$)##;
374	}
375	foreach my $repo_id (keys %$remotes) {
376		my $u = $remotes->{$repo_id}->{url} or next;
377		remove_username($u);
378		next if defined $repos_root && $repos_root ne $u;
379
380		my $fetch = $remotes->{$repo_id}->{fetch} || {};
381		foreach my $t (qw/branches tags/) {
382			foreach my $globspec (@{$remotes->{$repo_id}->{$t}}) {
383				resolve_local_globs($u, $fetch, $globspec);
384			}
385		}
386		my $p = $path;
387		my $rwr = rewrite_root({repo_id => $repo_id});
388		my $svm = $remotes->{$repo_id}->{svm}
389			if defined $remotes->{$repo_id}->{svm};
390		unless (defined $p) {
391			$p = $full_url;
392			my $z = $u;
393			my $prefix = '';
394			if ($rwr) {
395				$z = $rwr;
396				remove_username($z);
397			} elsif (defined $svm) {
398				$z = $svm->{source};
399				$prefix = $svm->{replace};
400				$prefix =~ s#^\Q$u\E(?:/|$)##;
401				$prefix =~ s#/$##;
402			}
403			$p =~ s#^\Q$z\E(?:/|$)#$prefix# or next;
404		}
405
406		# remote fetch paths are not URI escaped.  Decode ours
407		# so they match
408		$p = uri_decode($p);
409
410		foreach my $f (keys %$fetch) {
411			next if $f ne $p;
412			return Git::SVN->new($fetch->{$f}, $repo_id, $f);
413		}
414	}
415	undef;
416}
417
418sub init {
419	my ($class, $url, $path, $repo_id, $ref_id, $no_write) = @_;
420	my $self = _new($class, $repo_id, $ref_id, $path);
421	if (defined $url) {
422		$self->init_remote_config($url, $no_write);
423	}
424	$self;
425}
426
427sub find_ref {
428	my ($ref_id) = @_;
429	foreach (command(qw/config -l/)) {
430		next unless m!^svn-remote\.(.+)\.fetch=
431		              \s*(.*?)\s*:\s*(.+?)\s*$!x;
432		my ($repo_id, $path, $ref) = ($1, $2, $3);
433		if ($ref eq $ref_id) {
434			$path = '' if ($path =~ m#^\./?#);
435			return ($repo_id, $path);
436		}
437	}
438	(undef, undef, undef);
439}
440
441sub new {
442	my ($class, $ref_id, $repo_id, $path) = @_;
443	if (defined $ref_id && !defined $repo_id && !defined $path) {
444		($repo_id, $path) = find_ref($ref_id);
445		if (!defined $repo_id) {
446			die "Could not find a \"svn-remote.*.fetch\" key ",
447			    "in the repository configuration matching: ",
448			    "$ref_id\n";
449		}
450	}
451	my $self = _new($class, $repo_id, $ref_id, $path);
452	if (!defined $self->path || !length $self->path) {
453		my $fetch = command_oneline('config', '--get',
454		                            "svn-remote.$repo_id.fetch",
455		                            ":$ref_id\$") or
456		     die "Failed to read \"svn-remote.$repo_id.fetch\" ",
457		         "\":$ref_id\$\" in config\n";
458		my($path) = split(/\s*:\s*/, $fetch);
459		$self->path($path);
460	}
461	{
462		my $path = $self->path;
463		$path =~ s{\A/}{};
464		$path =~ s{/\z}{};
465		$self->path($path);
466	}
467	my $url = command_oneline('config', '--get',
468	                          "svn-remote.$repo_id.url") or
469                  die "Failed to read \"svn-remote.$repo_id.url\" in config\n";
470	$self->url($url);
471	$self->{pushurl} = eval { command_oneline('config', '--get',
472	                          "svn-remote.$repo_id.pushurl") };
473	$self->rebuild;
474	$self;
475}
476
477sub refname {
478	my ($refname) = $_[0]->{ref_id} ;
479
480	# It cannot end with a slash /, we'll throw up on this because
481	# SVN can't have directories with a slash in their name, either:
482	if ($refname =~ m{/$}) {
483		die "ref: '$refname' ends with a trailing slash; this is ",
484		    "not permitted by git or Subversion\n";
485	}
486
487	# It cannot have ASCII control character space, tilde ~, caret ^,
488	# colon :, question-mark ?, asterisk *, space, or open bracket [
489	# anywhere.
490	#
491	# Additionally, % must be escaped because it is used for escaping
492	# and we want our escaped refname to be reversible
493	$refname =~ s{([ \%~\^:\?\*\[\t\\])}{sprintf('%%%02X',ord($1))}eg;
494
495	# no slash-separated component can begin with a dot .
496	# /.* becomes /%2E*
497	$refname =~ s{/\.}{/%2E}g;
498
499	# It cannot have two consecutive dots .. anywhere
500	# .. becomes %2E%2E
501	$refname =~ s{\.\.}{%2E%2E}g;
502
503	# trailing dots and .lock are not allowed
504	# .$ becomes %2E and .lock becomes %2Elock
505	$refname =~ s{\.(?=$|lock$)}{%2E};
506
507	# the sequence @{ is used to access the reflog
508	# @{ becomes %40{
509	$refname =~ s{\@\{}{%40\{}g;
510
511	return $refname;
512}
513
514sub desanitize_refname {
515	my ($refname) = @_;
516	$refname =~ s{%(?:([0-9A-F]{2}))}{chr hex($1)}eg;
517	return $refname;
518}
519
520sub svm_uuid {
521	my ($self) = @_;
522	return $self->{svm}->{uuid} if $self->svm;
523	$self->ra;
524	unless ($self->{svm}) {
525		die "SVM UUID not cached, and reading remotely failed\n";
526	}
527	$self->{svm}->{uuid};
528}
529
530sub svm {
531	my ($self) = @_;
532	return $self->{svm} if $self->{svm};
533	my $svm;
534	# see if we have it in our config, first:
535	eval {
536		my $section = "svn-remote.$self->{repo_id}";
537		$svm = {
538		  source => tmp_config('--get', "$section.svm-source"),
539		  uuid => tmp_config('--get', "$section.svm-uuid"),
540		  replace => tmp_config('--get', "$section.svm-replace"),
541		}
542	};
543	if ($svm && $svm->{source} && $svm->{uuid} && $svm->{replace}) {
544		$self->{svm} = $svm;
545	}
546	$self->{svm};
547}
548
549sub _set_svm_vars {
550	my ($self, $ra) = @_;
551	return $ra if $self->svm;
552
553	my @err = ( "useSvmProps set, but failed to read SVM properties\n",
554		    "(svm:source, svm:uuid) ",
555		    "from the following URLs:\n" );
556	sub read_svm_props {
557		my ($self, $ra, $path, $r) = @_;
558		my $props = ($ra->get_dir($path, $r))[2];
559		my $src = $props->{'svm:source'};
560		my $uuid = $props->{'svm:uuid'};
561		return undef if (!$src || !$uuid);
562
563		chomp($src, $uuid);
564
565		$uuid =~ m{^[0-9a-f\-]{30,}$}i
566		    or die "doesn't look right - svm:uuid is '$uuid'\n";
567
568		# the '!' is used to mark the repos_root!/relative/path
569		$src =~ s{/?!/?}{/};
570		$src =~ s{/+$}{}; # no trailing slashes please
571		# username is of no interest
572		$src =~ s{(^[a-z\+]*://)[^/@]*@}{$1};
573
574		my $replace = add_path_to_url($ra->url, $path);
575
576		my $section = "svn-remote.$self->{repo_id}";
577		tmp_config("$section.svm-source", $src);
578		tmp_config("$section.svm-replace", $replace);
579		tmp_config("$section.svm-uuid", $uuid);
580		$self->{svm} = {
581			source => $src,
582			uuid => $uuid,
583			replace => $replace
584		};
585	}
586
587	my $r = $ra->get_latest_revnum;
588	my $path = $self->path;
589	my %tried;
590	while (length $path) {
591		my $try = add_path_to_url($self->url, $path);
592		unless ($tried{$try}) {
593			return $ra if $self->read_svm_props($ra, $path, $r);
594			$tried{$try} = 1;
595		}
596		$path =~ s#/?[^/]+$##;
597	}
598	die "Path: '$path' should be ''\n" if $path ne '';
599	return $ra if $self->read_svm_props($ra, $path, $r);
600	$tried{ add_path_to_url($self->url, $path) } = 1;
601
602	if ($ra->{repos_root} eq $self->url) {
603		die @err, (map { "  $_\n" } keys %tried), "\n";
604	}
605
606	# nope, make sure we're connected to the repository root:
607	my $ok;
608	my @tried_b;
609	$path = $ra->{svn_path};
610	$ra = Git::SVN::Ra->new($ra->{repos_root});
611	while (length $path) {
612		my $try = add_path_to_url($ra->url, $path);
613		unless ($tried{$try}) {
614			$ok = $self->read_svm_props($ra, $path, $r);
615			last if $ok;
616			$tried{$try} = 1;
617		}
618		$path =~ s#/?[^/]+$##;
619	}
620	die "Path: '$path' should be ''\n" if $path ne '';
621	$ok ||= $self->read_svm_props($ra, $path, $r);
622	$tried{ add_path_to_url($ra->url, $path) } = 1;
623	if (!$ok) {
624		die @err, (map { "  $_\n" } keys %tried), "\n";
625	}
626	Git::SVN::Ra->new($self->url);
627}
628
629sub svnsync {
630	my ($self) = @_;
631	return $self->{svnsync} if $self->{svnsync};
632
633	if ($self->no_metadata) {
634		die "Can't have both 'noMetadata' and ",
635		    "'useSvnsyncProps' options set!\n";
636	}
637	if ($self->rewrite_root) {
638		die "Can't have both 'useSvnsyncProps' and 'rewriteRoot' ",
639		    "options set!\n";
640	}
641	if ($self->rewrite_uuid) {
642		die "Can't have both 'useSvnsyncProps' and 'rewriteUUID' ",
643		    "options set!\n";
644	}
645
646	my $svnsync;
647	# see if we have it in our config, first:
648	eval {
649		my $section = "svn-remote.$self->{repo_id}";
650
651		my $url = tmp_config('--get', "$section.svnsync-url");
652		($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
653		   die "doesn't look right - svn:sync-from-url is '$url'\n";
654
655		my $uuid = tmp_config('--get', "$section.svnsync-uuid");
656		($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
657		   die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
658
659		$svnsync = { url => $url, uuid => $uuid }
660	};
661	if ($svnsync && $svnsync->{url} && $svnsync->{uuid}) {
662		return $self->{svnsync} = $svnsync;
663	}
664
665	my $err = "useSvnsyncProps set, but failed to read " .
666	          "svnsync property: svn:sync-from-";
667	my $rp = $self->ra->rev_proplist(0);
668
669	my $url = $rp->{'svn:sync-from-url'} or die $err . "url\n";
670	($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
671	           die "doesn't look right - svn:sync-from-url is '$url'\n";
672
673	my $uuid = $rp->{'svn:sync-from-uuid'} or die $err . "uuid\n";
674	($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
675	           die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
676
677	my $section = "svn-remote.$self->{repo_id}";
678	tmp_config('--add', "$section.svnsync-uuid", $uuid);
679	tmp_config('--add', "$section.svnsync-url", $url);
680	return $self->{svnsync} = { url => $url, uuid => $uuid };
681}
682
683# this allows us to memoize our SVN::Ra UUID locally and avoid a
684# remote lookup (useful for 'git svn log').
685sub ra_uuid {
686	my ($self) = @_;
687	unless ($self->{ra_uuid}) {
688		my $key = "svn-remote.$self->{repo_id}.uuid";
689		my $uuid = eval { tmp_config('--get', $key) };
690		if (!$@ && $uuid && $uuid =~ /^([a-f\d\-]{30,})$/i) {
691			$self->{ra_uuid} = $uuid;
692		} else {
693			die "ra_uuid called without URL\n" unless $self->url;
694			$self->{ra_uuid} = $self->ra->get_uuid;
695			tmp_config('--add', $key, $self->{ra_uuid});
696		}
697	}
698	$self->{ra_uuid};
699}
700
701sub _set_repos_root {
702	my ($self, $repos_root) = @_;
703	my $k = "svn-remote.$self->{repo_id}.reposRoot";
704	$repos_root ||= $self->ra->{repos_root};
705	tmp_config($k, $repos_root);
706	$repos_root;
707}
708
709sub repos_root {
710	my ($self) = @_;
711	my $k = "svn-remote.$self->{repo_id}.reposRoot";
712	eval { tmp_config('--get', $k) } || $self->_set_repos_root;
713}
714
715sub ra {
716	my ($self) = shift;
717	my $ra = Git::SVN::Ra->new($self->url);
718	$self->_set_repos_root($ra->{repos_root});
719	if ($self->use_svm_props && !$self->{svm}) {
720		if ($self->no_metadata) {
721			die "Can't have both 'noMetadata' and ",
722			    "'useSvmProps' options set!\n";
723		} elsif ($self->use_svnsync_props) {
724			die "Can't have both 'useSvnsyncProps' and ",
725			    "'useSvmProps' options set!\n";
726		}
727		$ra = $self->_set_svm_vars($ra);
728		$self->{-want_revprops} = 1;
729	}
730	$ra;
731}
732
733# prop_walk(PATH, REV, SUB)
734# -------------------------
735# Recursively traverse PATH at revision REV and invoke SUB for each
736# directory that contains a SVN property.  SUB will be invoked as
737# follows:  &SUB(gs, path, props);  where `gs' is this instance of
738# Git::SVN, `path' the path to the directory where the properties
739# `props' were found.  The `path' will be relative to point of checkout,
740# that is, if url://repo/trunk is the current Git branch, and that
741# directory contains a sub-directory `d', SUB will be invoked with `/d/'
742# as `path' (note the trailing `/').
743sub prop_walk {
744	my ($self, $path, $rev, $sub) = @_;
745
746	$path =~ s#^/##;
747	my ($dirent, undef, $props) = $self->ra->get_dir($path, $rev);
748	$path =~ s#^/*#/#g;
749	my $p = $path;
750	# Strip the irrelevant part of the path.
751	$p =~ s#^/+\Q@{[$self->path]}\E(/|$)#/#;
752	# Ensure the path is terminated by a `/'.
753	$p =~ s#/*$#/#;
754
755	# The properties contain all the internal SVN stuff nobody
756	# (usually) cares about.
757	my $interesting_props = 0;
758	foreach (keys %{$props}) {
759		# If it doesn't start with `svn:', it must be a
760		# user-defined property.
761		++$interesting_props and next if $_ !~ /^svn:/;
762		# FIXME: Fragile, if SVN adds new public properties,
763		# this needs to be updated.
764		++$interesting_props if /^svn:(?:ignore|keywords|executable
765		                                 |eol-style|mime-type
766						 |externals|needs-lock)$/x;
767	}
768	&$sub($self, $p, $props) if $interesting_props;
769
770	foreach (sort keys %$dirent) {
771		next if $dirent->{$_}->{kind} != $SVN::Node::dir;
772		$self->prop_walk($self->path . $p . $_, $rev, $sub);
773	}
774}
775
776sub last_rev { ($_[0]->last_rev_commit)[0] }
777sub last_commit { ($_[0]->last_rev_commit)[1] }
778
779# returns the newest SVN revision number and newest commit SHA1
780sub last_rev_commit {
781	my ($self) = @_;
782	if (defined $self->{last_rev} && defined $self->{last_commit}) {
783		return ($self->{last_rev}, $self->{last_commit});
784	}
785	my $c = ::verify_ref($self->refname.'^0');
786	if ($c && !$self->use_svm_props && !$self->no_metadata) {
787		my $rev = (::cmt_metadata($c))[1];
788		if (defined $rev) {
789			($self->{last_rev}, $self->{last_commit}) = ($rev, $c);
790			return ($rev, $c);
791		}
792	}
793	my $map_path = $self->map_path;
794	unless (-e $map_path) {
795		($self->{last_rev}, $self->{last_commit}) = (undef, undef);
796		return (undef, undef);
797	}
798	my ($rev, $commit) = $self->rev_map_max(1);
799	($self->{last_rev}, $self->{last_commit}) = ($rev, $commit);
800	return ($rev, $commit);
801}
802
803sub get_fetch_range {
804	my ($self, $min, $max) = @_;
805	$max ||= $self->ra->get_latest_revnum;
806	$min ||= $self->rev_map_max;
807	(++$min, $max);
808}
809
810sub svn_dir {
811	command_oneline(qw(rev-parse --git-path svn));
812}
813
814sub tmp_config {
815	my (@args) = @_;
816	my $svn_dir = svn_dir();
817	my $old_def_config = "$svn_dir/config";
818	my $config = "$svn_dir/.metadata";
819	if (! -f $config && -f $old_def_config) {
820		rename $old_def_config, $config or
821		       die "Failed rename $old_def_config => $config: $!\n";
822	}
823	my $old_config = $ENV{GIT_CONFIG};
824	$ENV{GIT_CONFIG} = $config;
825	$@ = undef;
826	my @ret = eval {
827		unless (-f $config) {
828			mkfile($config);
829			open my $fh, '>', $config or
830			    die "Can't open $config: $!\n";
831			print $fh "; This file is used internally by ",
832			          "git-svn\n" or die
833				  "Couldn't write to $config: $!\n";
834			print $fh "; You should not have to edit it\n" or
835			      die "Couldn't write to $config: $!\n";
836			close $fh or die "Couldn't close $config: $!\n";
837		}
838		command('config', @args);
839	};
840	my $err = $@;
841	if (defined $old_config) {
842		$ENV{GIT_CONFIG} = $old_config;
843	} else {
844		delete $ENV{GIT_CONFIG};
845	}
846	die $err if $err;
847	wantarray ? @ret : $ret[0];
848}
849
850sub tmp_index_do {
851	my ($self, $sub) = @_;
852	my $old_index = $ENV{GIT_INDEX_FILE};
853	$ENV{GIT_INDEX_FILE} = $self->{index};
854	$@ = undef;
855	my @ret = eval {
856		my ($dir, $base) = ($self->{index} =~ m#^(.*?)/?([^/]+)$#);
857		mkpath([$dir]) unless -d $dir;
858		&$sub;
859	};
860	my $err = $@;
861	if (defined $old_index) {
862		$ENV{GIT_INDEX_FILE} = $old_index;
863	} else {
864		delete $ENV{GIT_INDEX_FILE};
865	}
866	die $err if $err;
867	wantarray ? @ret : $ret[0];
868}
869
870sub assert_index_clean {
871	my ($self, $treeish) = @_;
872
873	$self->tmp_index_do(sub {
874		command_noisy('read-tree', $treeish) unless -e $self->{index};
875		my $x = command_oneline('write-tree');
876		my ($y) = (command(qw/cat-file commit/, $treeish) =~
877		           /^tree ($::oid)/mo);
878		return if $y eq $x;
879
880		warn "Index mismatch: $y != $x\nrereading $treeish\n";
881		unlink $self->{index} or die "unlink $self->{index}: $!\n";
882		command_noisy('read-tree', $treeish);
883		$x = command_oneline('write-tree');
884		if ($y ne $x) {
885			fatal "trees ($treeish) $y != $x\n",
886			      "Something is seriously wrong...";
887		}
888	});
889}
890
891sub get_commit_parents {
892	my ($self, $log_entry) = @_;
893	my (%seen, @ret, @tmp);
894	# legacy support for 'set-tree'; this is only used by set_tree_cb:
895	if (my $ip = $self->{inject_parents}) {
896		if (my $commit = delete $ip->{$log_entry->{revision}}) {
897			push @tmp, $commit;
898		}
899	}
900	if (my $cur = ::verify_ref($self->refname.'^0')) {
901		push @tmp, $cur;
902	}
903	if (my $ipd = $self->{inject_parents_dcommit}) {
904		if (my $commit = delete $ipd->{$log_entry->{revision}}) {
905			push @tmp, @$commit;
906		}
907	}
908	push @tmp, $_ foreach (@{$log_entry->{parents}}, @tmp);
909	while (my $p = shift @tmp) {
910		next if $seen{$p};
911		$seen{$p} = 1;
912		push @ret, $p;
913	}
914	@ret;
915}
916
917sub rewrite_root {
918	my ($self) = @_;
919	return $self->{-rewrite_root} if exists $self->{-rewrite_root};
920	my $k = "svn-remote.$self->{repo_id}.rewriteRoot";
921	my $rwr = eval { command_oneline(qw/config --get/, $k) };
922	if ($rwr) {
923		$rwr =~ s#/+$##;
924		if ($rwr !~ m#^[a-z\+]+://#) {
925			die "$rwr is not a valid URL (key: $k)\n";
926		}
927	}
928	$self->{-rewrite_root} = $rwr;
929}
930
931sub rewrite_uuid {
932	my ($self) = @_;
933	return $self->{-rewrite_uuid} if exists $self->{-rewrite_uuid};
934	my $k = "svn-remote.$self->{repo_id}.rewriteUUID";
935	my $rwid = eval { command_oneline(qw/config --get/, $k) };
936	if ($rwid) {
937		$rwid =~ s#/+$##;
938		if ($rwid !~ m#^[a-f0-9]{8}-(?:[a-f0-9]{4}-){3}[a-f0-9]{12}$#) {
939			die "$rwid is not a valid UUID (key: $k)\n";
940		}
941	}
942	$self->{-rewrite_uuid} = $rwid;
943}
944
945sub metadata_url {
946	my ($self) = @_;
947	my $url = $self->rewrite_root || $self->url;
948	return canonicalize_url( add_path_to_url( $url, $self->path ) );
949}
950
951sub full_url {
952	my ($self) = @_;
953	return canonicalize_url( add_path_to_url( $self->url, $self->path ) );
954}
955
956sub full_pushurl {
957	my ($self) = @_;
958	if ($self->{pushurl}) {
959		return canonicalize_url( add_path_to_url( $self->{pushurl}, $self->path ) );
960	} else {
961		return $self->full_url;
962	}
963}
964
965sub set_commit_header_env {
966	my ($log_entry) = @_;
967	my %env;
968	foreach my $ned (qw/NAME EMAIL DATE/) {
969		foreach my $ac (qw/AUTHOR COMMITTER/) {
970			$env{"GIT_${ac}_${ned}"} = $ENV{"GIT_${ac}_${ned}"};
971		}
972	}
973
974	$ENV{GIT_AUTHOR_NAME} = $log_entry->{name};
975	$ENV{GIT_AUTHOR_EMAIL} = $log_entry->{email};
976	$ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_entry->{date};
977
978	$ENV{GIT_COMMITTER_NAME} = (defined $log_entry->{commit_name})
979						? $log_entry->{commit_name}
980						: $log_entry->{name};
981	$ENV{GIT_COMMITTER_EMAIL} = (defined $log_entry->{commit_email})
982						? $log_entry->{commit_email}
983						: $log_entry->{email};
984	\%env;
985}
986
987sub restore_commit_header_env {
988	my ($env) = @_;
989	foreach my $ned (qw/NAME EMAIL DATE/) {
990		foreach my $ac (qw/AUTHOR COMMITTER/) {
991			my $k = "GIT_${ac}_${ned}";
992			if (defined $env->{$k}) {
993				$ENV{$k} = $env->{$k};
994			} else {
995				delete $ENV{$k};
996			}
997		}
998	}
999}
1000
1001sub gc {
1002	command_noisy('gc', '--auto');
1003};
1004
1005sub do_git_commit {
1006	my ($self, $log_entry) = @_;
1007	my $lr = $self->last_rev;
1008	if (defined $lr && $lr >= $log_entry->{revision}) {
1009		die "Last fetched revision of ", $self->refname,
1010		    " was r$lr, but we are about to fetch: ",
1011		    "r$log_entry->{revision}!\n";
1012	}
1013	if (my $c = $self->rev_map_get($log_entry->{revision})) {
1014		croak "$log_entry->{revision} = $c already exists! ",
1015		      "Why are we refetching it?\n";
1016	}
1017	my $old_env = set_commit_header_env($log_entry);
1018	my $tree = $log_entry->{tree};
1019	if (!defined $tree) {
1020		$tree = $self->tmp_index_do(sub {
1021		                            command_oneline('write-tree') });
1022	}
1023	die "Tree is not a valid oid $tree\n" if $tree !~ /^$::oid$/o;
1024
1025	my @exec = ('git', 'commit-tree', $tree);
1026	foreach ($self->get_commit_parents($log_entry)) {
1027		push @exec, '-p', $_;
1028	}
1029	defined(my $pid = open3(my $msg_fh, my $out_fh, '>&STDERR', @exec))
1030	                                                           or croak $!;
1031	binmode $msg_fh;
1032
1033	# we always get UTF-8 from SVN, but we may want our commits in
1034	# a different encoding.
1035	if (my $enc = Git::config('i18n.commitencoding')) {
1036		require Encode;
1037		Encode::from_to($log_entry->{log}, 'UTF-8', $enc);
1038	}
1039	print $msg_fh $log_entry->{log} or croak $!;
1040	restore_commit_header_env($old_env);
1041	unless ($self->no_metadata) {
1042		print $msg_fh "\ngit-svn-id: $log_entry->{metadata}\n"
1043		              or croak $!;
1044	}
1045	$msg_fh->flush == 0 or croak $!;
1046	close $msg_fh or croak $!;
1047	chomp(my $commit = do { local $/; <$out_fh> });
1048	close $out_fh or croak $!;
1049	waitpid $pid, 0;
1050	croak $? if $?;
1051	if ($commit !~ /^$::oid$/o) {
1052		die "Failed to commit, invalid oid: $commit\n";
1053	}
1054
1055	$self->rev_map_set($log_entry->{revision}, $commit, 1);
1056
1057	$self->{last_rev} = $log_entry->{revision};
1058	$self->{last_commit} = $commit;
1059	print "r$log_entry->{revision}" unless $::_q > 1;
1060	if (defined $log_entry->{svm_revision}) {
1061		 print " (\@$log_entry->{svm_revision})" unless $::_q > 1;
1062		 $self->rev_map_set($log_entry->{svm_revision}, $commit,
1063		                   0, $self->svm_uuid);
1064	}
1065	print " = $commit ($self->{ref_id})\n" unless $::_q > 1;
1066	if (--$_gc_nr == 0) {
1067		$_gc_nr = $_gc_period;
1068		gc();
1069	}
1070	return $commit;
1071}
1072
1073sub match_paths {
1074	my ($self, $paths, $r) = @_;
1075	return 1 if $self->path eq '';
1076	if (my $path = $paths->{"/".$self->path}) {
1077		return ($path->{action} eq 'D') ? 0 : 1;
1078	}
1079	$self->{path_regex} ||= qr{^/\Q@{[$self->path]}\E/};
1080	if (grep /$self->{path_regex}/, keys %$paths) {
1081		return 1;
1082	}
1083	my $c = '';
1084	foreach (split m#/#, $self->path) {
1085		$c .= "/$_";
1086		next unless ($paths->{$c} &&
1087		             ($paths->{$c}->{action} =~ /^[AR]$/));
1088		if ($self->ra->check_path($self->path, $r) ==
1089		    $SVN::Node::dir) {
1090			return 1;
1091		}
1092	}
1093	return 0;
1094}
1095
1096sub find_parent_branch {
1097	my ($self, $paths, $rev) = @_;
1098	return undef unless $self->follow_parent;
1099	unless (defined $paths) {
1100		my $err_handler = $SVN::Error::handler;
1101		$SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs;
1102		$self->ra->get_log([$self->path], $rev, $rev, 0, 1, 1,
1103				   sub { $paths = $_[0] });
1104		$SVN::Error::handler = $err_handler;
1105	}
1106	return undef unless defined $paths;
1107
1108	# look for a parent from another branch:
1109	my @b_path_components = split m#/#, $self->path;
1110	my @a_path_components;
1111	my $i;
1112	while (@b_path_components) {
1113		$i = $paths->{'/'.join('/', @b_path_components)};
1114		last if $i && defined $i->{copyfrom_path};
1115		unshift(@a_path_components, pop(@b_path_components));
1116	}
1117	return undef unless defined $i && defined $i->{copyfrom_path};
1118	my $branch_from = $i->{copyfrom_path};
1119	if (@a_path_components) {
1120		print STDERR "branch_from: $branch_from => ";
1121		$branch_from .= '/'.join('/', @a_path_components);
1122		print STDERR $branch_from, "\n";
1123	}
1124	my $r = $i->{copyfrom_rev};
1125	my $repos_root = $self->ra->{repos_root};
1126	my $url = $self->ra->url;
1127	my $new_url = canonicalize_url( add_path_to_url( $url, $branch_from ) );
1128	print STDERR  "Found possible branch point: ",
1129	              "$new_url => ", $self->full_url, ", $r\n"
1130	              unless $::_q > 1;
1131	$branch_from =~ s#^/##;
1132	my $gs = $self->other_gs($new_url, $url,
1133		                 $branch_from, $r, $self->{ref_id});
1134	my ($r0, $parent) = $gs->find_rev_before($r, 1);
1135	{
1136		my ($base, $head);
1137		if (!defined $r0 || !defined $parent) {
1138			($base, $head) = parse_revision_argument(0, $r);
1139		} else {
1140			if ($r0 < $r) {
1141				$gs->ra->get_log([$gs->path], $r0 + 1, $r, 1,
1142					0, 1, sub { $base = $_[1] - 1 });
1143			}
1144		}
1145		if (defined $base && $base <= $r) {
1146			$gs->fetch($base, $r);
1147		}
1148		($r0, $parent) = $gs->find_rev_before($r, 1);
1149	}
1150	if (defined $r0 && defined $parent) {
1151		print STDERR "Found branch parent: ($self->{ref_id}) $parent\n"
1152		             unless $::_q > 1;
1153		my $ed;
1154		if ($self->ra->can_do_switch) {
1155			$self->assert_index_clean($parent);
1156			print STDERR "Following parent with do_switch\n"
1157			             unless $::_q > 1;
1158			# do_switch works with svn/trunk >= r22312, but that
1159			# is not included with SVN 1.4.3 (the latest version
1160			# at the moment), so we can't rely on it
1161			$self->{last_rev} = $r0;
1162			$self->{last_commit} = $parent;
1163			$ed = Git::SVN::Fetcher->new($self, $gs->path);
1164			$gs->ra->gs_do_switch($r0, $rev, $gs,
1165					      $self->full_url, $ed)
1166			  or die "SVN connection failed somewhere...\n";
1167		} elsif ($self->ra->trees_match($new_url, $r0,
1168			                        $self->full_url, $rev)) {
1169			print STDERR "Trees match:\n",
1170			             "  $new_url\@$r0\n",
1171			             "  ${\$self->full_url}\@$rev\n",
1172			             "Following parent with no changes\n"
1173			             unless $::_q > 1;
1174			$self->tmp_index_do(sub {
1175			    command_noisy('read-tree', $parent);
1176			});
1177			$self->{last_commit} = $parent;
1178		} else {
1179			print STDERR "Following parent with do_update\n"
1180			             unless $::_q > 1;
1181			$ed = Git::SVN::Fetcher->new($self);
1182			$self->ra->gs_do_update($rev, $rev, $self, $ed)
1183			  or die "SVN connection failed somewhere...\n";
1184		}
1185		print STDERR "Successfully followed parent\n" unless $::_q > 1;
1186		return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
1187	}
1188	return undef;
1189}
1190
1191sub do_fetch {
1192	my ($self, $paths, $rev) = @_;
1193	my $ed;
1194	my ($last_rev, @parents);
1195	if (my $lc = $self->last_commit) {
1196		# we can have a branch that was deleted, then re-added
1197		# under the same name but copied from another path, in
1198		# which case we'll have multiple parents (we don't
1199		# want to break the original ref or lose copypath info):
1200		if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
1201			push @{$log_entry->{parents}}, $lc;
1202			return $log_entry;
1203		}
1204		$ed = Git::SVN::Fetcher->new($self);
1205		$last_rev = $self->{last_rev};
1206		$ed->{c} = $lc;
1207		@parents = ($lc);
1208	} else {
1209		$last_rev = $rev;
1210		if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
1211			return $log_entry;
1212		}
1213		$ed = Git::SVN::Fetcher->new($self);
1214	}
1215	unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
1216		die "SVN connection failed somewhere...\n";
1217	}
1218	$self->make_log_entry($rev, \@parents, $ed, $last_rev, $self->path);
1219}
1220
1221sub mkemptydirs {
1222	my ($self, $r) = @_;
1223
1224	# add/remove/collect a paths table
1225	#
1226	# Paths are split into a tree of nodes, stored as a hash of hashes.
1227	#
1228	# Each node contains a 'path' entry for the path (if any) associated
1229	# with that node and a 'children' entry for any nodes under that
1230	# location.
1231	#
1232	# Removing a path requires a hash lookup for each component then
1233	# dropping that node (and anything under it), which is substantially
1234	# faster than a grep slice into a single hash of paths for large
1235	# numbers of paths.
1236	#
1237	# For a large (200K) number of empty_dir directives this reduces
1238	# scanning time to 3 seconds vs 10 minutes for grep+delete on a single
1239	# hash of paths.
1240	sub add_path {
1241		my ($paths_table, $path) = @_;
1242		my $node_ref;
1243
1244		foreach my $x (split('/', $path)) {
1245			if (!exists($paths_table->{$x})) {
1246				$paths_table->{$x} = { children => {} };
1247			}
1248
1249			$node_ref = $paths_table->{$x};
1250			$paths_table = $paths_table->{$x}->{children};
1251		}
1252
1253		$node_ref->{path} = $path;
1254	}
1255
1256	sub remove_path {
1257		my ($paths_table, $path) = @_;
1258		my $nodes_ref;
1259		my $node_name;
1260
1261		foreach my $x (split('/', $path)) {
1262			if (!exists($paths_table->{$x})) {
1263				return;
1264			}
1265
1266			$nodes_ref = $paths_table;
1267			$node_name = $x;
1268
1269			$paths_table = $paths_table->{$x}->{children};
1270		}
1271
1272		delete($nodes_ref->{$node_name});
1273	}
1274
1275	sub collect_paths {
1276		my ($paths_table, $paths_ref) = @_;
1277
1278		foreach my $v (values %$paths_table) {
1279			my $p = $v->{path};
1280			my $c = $v->{children};
1281
1282			collect_paths($c, $paths_ref);
1283
1284			if (defined($p)) {
1285				push(@$paths_ref, $p);
1286			}
1287		}
1288	}
1289
1290	sub scan {
1291		my ($r, $paths_table, $line) = @_;
1292		if (defined $r && $line =~ /^r(\d+)$/) {
1293			return 0 if $1 > $r;
1294		} elsif ($line =~ /^  \+empty_dir: (.+)$/) {
1295			add_path($paths_table, $1);
1296		} elsif ($line =~ /^  \-empty_dir: (.+)$/) {
1297			remove_path($paths_table, $1);
1298		}
1299		1; # continue
1300	};
1301
1302	my @empty_dirs;
1303	my %paths_table;
1304
1305	my $gz_file = "$self->{dir}/unhandled.log.gz";
1306	if (-f $gz_file) {
1307		if (!can_compress()) {
1308			warn "Compress::Zlib could not be found; ",
1309			     "empty directories in $gz_file will not be read\n";
1310		} else {
1311			my $gz = Compress::Zlib::gzopen($gz_file, "rb") or
1312				die "Unable to open $gz_file: $!\n";
1313			my $line;
1314			while ($gz->gzreadline($line) > 0) {
1315				scan($r, \%paths_table, $line) or last;
1316			}
1317			$gz->gzclose;
1318		}
1319	}
1320
1321	if (open my $fh, '<', "$self->{dir}/unhandled.log") {
1322		binmode $fh or croak "binmode: $!";
1323		while (<$fh>) {
1324			scan($r, \%paths_table, $_) or last;
1325		}
1326		close $fh;
1327	}
1328
1329	collect_paths(\%paths_table, \@empty_dirs);
1330	my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/;
1331	foreach my $d (sort @empty_dirs) {
1332		$d = uri_decode($d);
1333		$d =~ s/$strip//;
1334		next unless length($d);
1335		next if -d $d;
1336		if (-e $d) {
1337			warn "$d exists but is not a directory\n";
1338		} else {
1339			print "creating empty directory: $d\n";
1340			mkpath([$d]);
1341		}
1342	}
1343}
1344
1345sub get_untracked {
1346	my ($self, $ed) = @_;
1347	my @out;
1348	my $h = $ed->{empty};
1349	foreach (sort keys %$h) {
1350		my $act = $h->{$_} ? '+empty_dir' : '-empty_dir';
1351		push @out, "  $act: " . uri_encode($_);
1352		warn "W: $act: $_\n";
1353	}
1354	foreach my $t (qw/dir_prop file_prop/) {
1355		$h = $ed->{$t} or next;
1356		foreach my $path (sort keys %$h) {
1357			my $ppath = $path eq '' ? '.' : $path;
1358			foreach my $prop (sort keys %{$h->{$path}}) {
1359				next if $SKIP_PROP{$prop};
1360				my $v = $h->{$path}->{$prop};
1361				my $t_ppath_prop = "$t: " .
1362				                    uri_encode($ppath) . ' ' .
1363				                    uri_encode($prop);
1364				if (defined $v) {
1365					push @out, "  +$t_ppath_prop " .
1366					           uri_encode($v);
1367				} else {
1368					push @out, "  -$t_ppath_prop";
1369				}
1370			}
1371		}
1372	}
1373	foreach my $t (qw/absent_file absent_directory/) {
1374		$h = $ed->{$t} or next;
1375		foreach my $parent (sort keys %$h) {
1376			foreach my $path (sort @{$h->{$parent}}) {
1377				push @out, "  $t: " .
1378				           uri_encode("$parent/$path");
1379				warn "W: $t: $parent/$path ",
1380				     "Insufficient permissions?\n";
1381			}
1382		}
1383	}
1384	\@out;
1385}
1386
1387# parse_svn_date(DATE)
1388# --------------------
1389# Given a date (in UTC) from Subversion, return a string in the format
1390# "<TZ Offset> <local date/time>" that Git will use.
1391#
1392# By default the parsed date will be in UTC; if $Git::SVN::_localtime
1393# is true we'll convert it to the local timezone instead.
1394sub parse_svn_date {
1395	my $date = shift || return '+0000 1970-01-01 00:00:00';
1396	my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
1397	                                    (\d\d?)\:(\d\d)\:(\d\d)\.\d*Z$/x) or
1398	                                 croak "Unable to parse date: $date\n";
1399	my $parsed_date;    # Set next.
1400
1401	if ($Git::SVN::_localtime) {
1402		# Translate the Subversion datetime to an epoch time.
1403		# Begin by switching ourselves to $date's timezone, UTC.
1404		my $old_env_TZ = $ENV{TZ};
1405		$ENV{TZ} = 'UTC';
1406
1407		my $epoch_in_UTC =
1408		    Time::Local::timelocal($S, $M, $H, $d, $m - 1, $Y);
1409
1410		# Determine our local timezone (including DST) at the
1411		# time of $epoch_in_UTC.  $Git::SVN::Log::TZ stored the
1412		# value of TZ, if any, at the time we were run.
1413		if (defined $Git::SVN::Log::TZ) {
1414			$ENV{TZ} = $Git::SVN::Log::TZ;
1415		} else {
1416			delete $ENV{TZ};
1417		}
1418
1419		my $our_TZ = get_tz_offset($epoch_in_UTC);
1420
1421		# This converts $epoch_in_UTC into our local timezone.
1422		my ($sec, $min, $hour, $mday, $mon, $year,
1423		    $wday, $yday, $isdst) = localtime($epoch_in_UTC);
1424
1425		$parsed_date = sprintf('%s %04d-%02d-%02d %02d:%02d:%02d',
1426				       $our_TZ, $year + 1900, $mon + 1,
1427				       $mday, $hour, $min, $sec);
1428
1429		# Reset us to the timezone in effect when we entered
1430		# this routine.
1431		if (defined $old_env_TZ) {
1432			$ENV{TZ} = $old_env_TZ;
1433		} else {
1434			delete $ENV{TZ};
1435		}
1436	} else {
1437		$parsed_date = "+0000 $Y-$m-$d $H:$M:$S";
1438	}
1439
1440	return $parsed_date;
1441}
1442
1443sub other_gs {
1444	my ($self, $new_url, $url,
1445	    $branch_from, $r, $old_ref_id) = @_;
1446	my $gs = Git::SVN->find_by_url($new_url, $url, $branch_from);
1447	unless ($gs) {
1448		my $ref_id = $old_ref_id;
1449		$ref_id =~ s/\@\d+-*$//;
1450		$ref_id .= "\@$r";
1451		# just grow a tail if we're not unique enough :x
1452		$ref_id .= '-' while find_ref($ref_id);
1453		my ($u, $p, $repo_id) = ($new_url, '', $ref_id);
1454		if ($u =~ s#^\Q$url\E(/|$)##) {
1455			$p = $u;
1456			$u = $url;
1457			$repo_id = $self->{repo_id};
1458		}
1459		while (1) {
1460			# It is possible to tag two different subdirectories at
1461			# the same revision.  If the url for an existing ref
1462			# does not match, we must either find a ref with a
1463			# matching url or create a new ref by growing a tail.
1464			$gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1);
1465			my (undef, $max_commit) = $gs->rev_map_max(1);
1466			last if (!$max_commit);
1467			my ($url) = ::cmt_metadata($max_commit);
1468			last if ($url eq $gs->metadata_url);
1469			$ref_id .= '-';
1470		}
1471		print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1;
1472	}
1473	$gs
1474}
1475
1476sub call_authors_prog {
1477	my ($orig_author) = @_;
1478	$orig_author = command_oneline('rev-parse', '--sq-quote', $orig_author);
1479	my $author = `$::_authors_prog $orig_author`;
1480	if ($? != 0) {
1481		die "$::_authors_prog failed with exit code $?\n"
1482	}
1483	if ($author =~ /^\s*(.+?)\s*<(.*)>\s*$/) {
1484		my ($name, $email) = ($1, $2);
1485		return [$name, $email];
1486	} else {
1487		die "Author: $orig_author: $::_authors_prog returned "
1488			. "invalid author format: $author\n";
1489	}
1490}
1491
1492sub check_author {
1493	my ($author) = @_;
1494	if (defined $author) {
1495		$author =~ s/^\s+//g;
1496		$author =~ s/\s+$//g;
1497	}
1498	if (!defined $author || length $author == 0) {
1499		$author = '(no author)';
1500	}
1501	if (!defined $::users{$author}) {
1502		if (defined $::_authors_prog) {
1503			$::users{$author} = call_authors_prog($author);
1504		} elsif (defined $::_authors) {
1505			die "Author: $author not defined in $::_authors file\n";
1506		}
1507	}
1508	$author;
1509}
1510
1511sub find_extra_svk_parents {
1512	my ($self, $tickets, $parents) = @_;
1513	# aha!  svk:merge property changed...
1514	my @tickets = split "\n", $tickets;
1515	my @known_parents;
1516	for my $ticket ( @tickets ) {
1517		my ($uuid, $path, $rev) = split /:/, $ticket;
1518		if ( $uuid eq $self->ra_uuid ) {
1519			my $repos_root = $self->url;
1520			my $branch_from = $path;
1521			$branch_from =~ s{^/}{};
1522			my $gs = $self->other_gs(add_path_to_url( $repos_root, $branch_from ),
1523			                         $repos_root,
1524			                         $branch_from,
1525			                         $rev,
1526			                         $self->{ref_id});
1527			if ( my $commit = $gs->rev_map_get($rev, $uuid) ) {
1528				# wahey!  we found it, but it might be
1529				# an old one (!)
1530				push @known_parents, [ $rev, $commit ];
1531			}
1532		}
1533	}
1534	# Ordering matters; highest-numbered commit merge tickets
1535	# first, as they may account for later merge ticket additions
1536	# or changes.
1537	@known_parents = map {$_->[1]} sort {$b->[0] <=> $a->[0]} @known_parents;
1538	for my $parent ( @known_parents ) {
1539		my @cmd = ('rev-list', $parent, map { "^$_" } @$parents );
1540		my ($msg_fh, $ctx) = command_output_pipe(@cmd);
1541		my $new;
1542		while ( <$msg_fh> ) {
1543			$new=1;last;
1544		}
1545		command_close_pipe($msg_fh, $ctx);
1546		if ( $new ) {
1547			print STDERR
1548			    "Found merge parent (svk:merge ticket): $parent\n";
1549			push @$parents, $parent;
1550		}
1551	}
1552}
1553
1554sub lookup_svn_merge {
1555	my $uuid = shift;
1556	my $url = shift;
1557	my $source = shift;
1558	my $revs = shift;
1559
1560	my $path = $source;
1561	$path =~ s{^/}{};
1562	my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
1563	if ( !$gs ) {
1564		warn "Couldn't find revmap for $url$source\n";
1565		return;
1566	}
1567	my @ranges = split ",", $revs;
1568	my ($tip, $tip_commit);
1569	my @merged_commit_ranges;
1570	# find the tip
1571	for my $range ( @ranges ) {
1572		if ($range =~ /[*]$/) {
1573			warn "W: Ignoring partial merge in svn:mergeinfo "
1574				."dirprop: $source:$range\n";
1575			next;
1576		}
1577		my ($bottom, $top) = split "-", $range;
1578		$top ||= $bottom;
1579		my $bottom_commit = $gs->find_rev_after( $bottom, 1, $top );
1580		my $top_commit = $gs->find_rev_before( $top, 1, $bottom );
1581
1582		unless ($top_commit and $bottom_commit) {
1583			warn "W: unknown path/rev in svn:mergeinfo "
1584				."dirprop: $source:$range\n";
1585			next;
1586		}
1587
1588		if (scalar(command('rev-parse', "$bottom_commit^@"))) {
1589			push @merged_commit_ranges,
1590			     "$bottom_commit^..$top_commit";
1591		} else {
1592			push @merged_commit_ranges, "$top_commit";
1593		}
1594
1595		if ( !defined $tip or $top > $tip ) {
1596			$tip = $top;
1597			$tip_commit = $top_commit;
1598		}
1599	}
1600	return ($tip_commit, @merged_commit_ranges);
1601}
1602
1603sub _rev_list {
1604	my ($msg_fh, $ctx) = command_output_pipe(
1605		"rev-list", @_,
1606	       );
1607	my @rv;
1608	while ( <$msg_fh> ) {
1609		chomp;
1610		push @rv, $_;
1611	}
1612	command_close_pipe($msg_fh, $ctx);
1613	@rv;
1614}
1615
1616sub check_cherry_pick2 {
1617	my $base = shift;
1618	my $tip = shift;
1619	my $parents = shift;
1620	my @ranges = @_;
1621	my %commits = map { $_ => 1 }
1622		_rev_list("--no-merges", $tip, "--not", $base, @$parents, "--");
1623	for my $range ( @ranges ) {
1624		delete @commits{_rev_list($range, "--")};
1625	}
1626	for my $commit (keys %commits) {
1627		if (has_no_changes($commit)) {
1628			delete $commits{$commit};
1629		}
1630	}
1631	my @k = (keys %commits);
1632	return (scalar @k, $k[0]);
1633}
1634
1635sub has_no_changes {
1636	my $commit = shift;
1637
1638	my @revs = split / /, command_oneline(
1639		qw(rev-list --parents -1), $commit);
1640
1641	# Commits with no parents, e.g. the start of a partial branch,
1642	# have changes by definition.
1643	return 1 if (@revs < 2);
1644
1645	# Commits with multiple parents, e.g a merge, have no changes
1646	# by definition.
1647	return 0 if (@revs > 2);
1648
1649	return (command_oneline("rev-parse", "$commit^{tree}") eq
1650		command_oneline("rev-parse", "$commit~1^{tree}"));
1651}
1652
1653sub tie_for_persistent_memoization {
1654	my $hash = shift;
1655	my $path = shift;
1656
1657	unless ($memo_backend) {
1658		if (eval { require Git::SVN::Memoize::YAML; 1}) {
1659			$memo_backend = 1;
1660		} else {
1661			require Memoize::Storable;
1662			$memo_backend = -1;
1663		}
1664	}
1665
1666	if ($memo_backend > 0) {
1667		tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml";
1668	} else {
1669		# first verify that any existing file can actually be loaded
1670		# (it may have been saved by an incompatible version)
1671		my $db = "$path.db";
1672		if (-e $db) {
1673			use Storable qw(retrieve);
1674
1675			if (!eval { retrieve($db); 1 }) {
1676				unlink $db or die "unlink $db failed: $!";
1677			}
1678		}
1679		tie %$hash => 'Memoize::Storable', $db, 'nstore';
1680	}
1681}
1682
1683# The GIT_DIR environment variable is not always set until after the command
1684# line arguments are processed, so we can't memoize in a BEGIN block.
1685{
1686	my $memoized = 0;
1687
1688	sub memoize_svn_mergeinfo_functions {
1689		return if $memoized;
1690		$memoized = 1;
1691
1692		my $cache_path = svn_dir() . '/.caches/';
1693		mkpath([$cache_path]) unless -d $cache_path;
1694
1695		my %lookup_svn_merge_cache;
1696		my %check_cherry_pick2_cache;
1697		my %has_no_changes_cache;
1698
1699		tie_for_persistent_memoization(\%lookup_svn_merge_cache,
1700		    "$cache_path/lookup_svn_merge");
1701		memoize 'lookup_svn_merge',
1702			SCALAR_CACHE => 'FAULT',
1703			LIST_CACHE => ['HASH' => \%lookup_svn_merge_cache],
1704		;
1705
1706		tie_for_persistent_memoization(\%check_cherry_pick2_cache,
1707		    "$cache_path/check_cherry_pick2");
1708		memoize 'check_cherry_pick2',
1709			SCALAR_CACHE => 'FAULT',
1710			LIST_CACHE => ['HASH' => \%check_cherry_pick2_cache],
1711		;
1712
1713		tie_for_persistent_memoization(\%has_no_changes_cache,
1714		    "$cache_path/has_no_changes");
1715		memoize 'has_no_changes',
1716			SCALAR_CACHE => ['HASH' => \%has_no_changes_cache],
1717			LIST_CACHE => 'FAULT',
1718		;
1719	}
1720
1721	sub unmemoize_svn_mergeinfo_functions {
1722		return if not $memoized;
1723		$memoized = 0;
1724
1725		Memoize::unmemoize 'lookup_svn_merge';
1726		Memoize::unmemoize 'check_cherry_pick2';
1727		Memoize::unmemoize 'has_no_changes';
1728	}
1729
1730	sub clear_memoized_mergeinfo_caches {
1731		die "Only call this method in non-memoized context" if ($memoized);
1732
1733		my $cache_path = svn_dir() . '/.caches/';
1734		return unless -d $cache_path;
1735
1736		for my $cache_file (("$cache_path/lookup_svn_merge",
1737				     "$cache_path/check_cherry_pick", # old
1738				     "$cache_path/check_cherry_pick2",
1739				     "$cache_path/has_no_changes")) {
1740			for my $suffix (qw(yaml db)) {
1741				my $file = "$cache_file.$suffix";
1742				next unless -e $file;
1743				unlink($file) or die "unlink($file) failed: $!\n";
1744			}
1745		}
1746	}
1747
1748
1749	Memoize::memoize 'Git::SVN::repos_root';
1750}
1751
1752END {
1753	# Force cache writeout explicitly instead of waiting for
1754	# global destruction to avoid segfault in Storable:
1755	# http://rt.cpan.org/Public/Bug/Display.html?id=36087
1756	unmemoize_svn_mergeinfo_functions();
1757}
1758
1759sub parents_exclude {
1760	my $parents = shift;
1761	my @commits = @_;
1762	return unless @commits;
1763
1764	my @excluded;
1765	my $excluded;
1766	do {
1767		my @cmd = ('rev-list', "-1", @commits, "--not", @$parents );
1768		$excluded = command_oneline(@cmd);
1769		if ( $excluded ) {
1770			my @new;
1771			my $found;
1772			for my $commit ( @commits ) {
1773				if ( $commit eq $excluded ) {
1774					push @excluded, $commit;
1775					$found++;
1776				}
1777				else {
1778					push @new, $commit;
1779				}
1780			}
1781			die "saw commit '$excluded' in rev-list output, "
1782				."but we didn't ask for that commit (wanted: @commits --not @$parents)"
1783					unless $found;
1784			@commits = @new;
1785		}
1786	}
1787		while ($excluded and @commits);
1788
1789	return @excluded;
1790}
1791
1792# Compute what's new in svn:mergeinfo.
1793sub mergeinfo_changes {
1794	my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
1795	my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
1796	my $old_minfo = {};
1797
1798	my $ra = $self->ra;
1799	# Give up if $old_path isn't in the repo.
1800	# This is probably a merge on a subtree.
1801	if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
1802		warn "W: ignoring svn:mergeinfo on $old_path, ",
1803			"directory didn't exist in r$old_rev\n";
1804		return {};
1805	}
1806	my (undef, undef, $props) = $ra->get_dir($old_path, $old_rev);
1807	if (defined $props->{"svn:mergeinfo"}) {
1808		my %omi = map {split ":", $_ } split "\n",
1809			$props->{"svn:mergeinfo"};
1810		$old_minfo = \%omi;
1811	}
1812
1813	my %changes = ();
1814	foreach my $p (keys %minfo) {
1815		my $a = $old_minfo->{$p} || "";
1816		my $b = $minfo{$p};
1817		# Omit merged branches whose ranges lists are unchanged.
1818		next if $a eq $b;
1819		# Remove any common range list prefix.
1820		($a ^ $b) =~ /^[\0]*/;
1821		my $common_prefix = rindex $b, ",", $+[0] - 1;
1822		$changes{$p} = substr $b, $common_prefix + 1;
1823	}
1824	print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
1825		scalar(keys %minfo), " sources, ",
1826		scalar(keys %changes), " changed\n";
1827
1828	return \%changes;
1829}
1830
1831# note: this function should only be called if the various dirprops
1832# have actually changed
1833sub find_extra_svn_parents {
1834	my ($self, $mergeinfo, $parents) = @_;
1835	# aha!  svk:merge property changed...
1836
1837	memoize_svn_mergeinfo_functions();
1838
1839	# We first search for merged tips which are not in our
1840	# history.  Then, we figure out which git revisions are in
1841	# that tip, but not this revision.  If all of those revisions
1842	# are now marked as merge, we can add the tip as a parent.
1843	my @merges = sort keys %$mergeinfo;
1844	my @merge_tips;
1845	my $url = $self->url;
1846	my $uuid = $self->ra_uuid;
1847	my @all_ranges;
1848	for my $merge ( @merges ) {
1849		my ($tip_commit, @ranges) =
1850			lookup_svn_merge( $uuid, $url,
1851					  $merge, $mergeinfo->{$merge} );
1852		unless (!$tip_commit or
1853				grep { $_ eq $tip_commit } @$parents ) {
1854			push @merge_tips, $tip_commit;
1855			push @all_ranges, @ranges;
1856		} else {
1857			push @merge_tips, undef;
1858		}
1859	}
1860
1861	my %excluded = map { $_ => 1 }
1862		parents_exclude($parents, grep { defined } @merge_tips);
1863
1864	# check merge tips for new parents
1865	my @new_parents;
1866	for my $merge_tip ( @merge_tips ) {
1867		my $merge = shift @merges;
1868		next unless $merge_tip and $excluded{$merge_tip};
1869		my $spec = "$merge:$mergeinfo->{$merge}";
1870
1871		# check out 'new' tips
1872		my $merge_base;
1873		eval {
1874			$merge_base = command_oneline(
1875				"merge-base",
1876				@$parents, $merge_tip,
1877			);
1878		};
1879		if ($@) {
1880			die "An error occurred during merge-base"
1881				unless $@->isa("Git::Error::Command");
1882
1883			warn "W: Cannot find common ancestor between ".
1884			     "@$parents and $merge_tip. Ignoring merge info.\n";
1885			next;
1886		}
1887
1888		# double check that there are no missing non-merge commits
1889		my ($ninc, $ifirst) = check_cherry_pick2(
1890			$merge_base, $merge_tip,
1891			$parents,
1892			@all_ranges,
1893		       );
1894
1895		if ($ninc) {
1896			warn "W: svn cherry-pick ignored ($spec) - missing " .
1897				"$ninc commit(s) (eg $ifirst)\n";
1898		} else {
1899			warn "Found merge parent ($spec): ", $merge_tip, "\n";
1900			push @new_parents, $merge_tip;
1901		}
1902	}
1903
1904	# cater for merges which merge commits from multiple branches
1905	if ( @new_parents > 1 ) {
1906		for ( my $i = 0; $i <= $#new_parents; $i++ ) {
1907			for ( my $j = 0; $j <= $#new_parents; $j++ ) {
1908				next if $i == $j;
1909				next unless $new_parents[$i];
1910				next unless $new_parents[$j];
1911				my $revs = command_oneline(
1912					"rev-list", "-1",
1913					"$new_parents[$i]..$new_parents[$j]",
1914				       );
1915				if ( !$revs ) {
1916					undef($new_parents[$j]);
1917				}
1918			}
1919		}
1920	}
1921	push @$parents, grep { defined } @new_parents;
1922}
1923
1924sub make_log_entry {
1925	my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
1926	my $untracked = $self->get_untracked($ed);
1927
1928	my @parents = @$parents;
1929	my $props = $ed->{dir_prop}{$self->path};
1930	if ($self->follow_parent) {
1931		my $tickets = $props->{"svk:merge"};
1932		if ($tickets) {
1933			$self->find_extra_svk_parents($tickets, \@parents);
1934		}
1935
1936		my $mergeinfo_prop = $props->{"svn:mergeinfo"};
1937		if ($mergeinfo_prop) {
1938			my $mi_changes = $self->mergeinfo_changes(
1939						$parent_path,
1940						$parent_rev,
1941						$self->path,
1942						$rev,
1943						$mergeinfo_prop);
1944			$self->find_extra_svn_parents($mi_changes, \@parents);
1945		}
1946	}
1947
1948	open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!;
1949	print $un "r$rev\n" or croak $!;
1950	print $un $_, "\n" foreach @$untracked;
1951	my %log_entry = ( parents => \@parents, revision => $rev,
1952	                  log => '');
1953
1954	my $headrev;
1955	my $logged = delete $self->{logged_rev_props};
1956	if (!$logged || $self->{-want_revprops}) {
1957		my $rp = $self->ra->rev_proplist($rev);
1958		foreach (sort keys %$rp) {
1959			my $v = $rp->{$_};
1960			if (/^svn:(author|date|log)$/) {
1961				$log_entry{$1} = $v;
1962			} elsif ($_ eq 'svm:headrev') {
1963				$headrev = $v;
1964			} else {
1965				print $un "  rev_prop: ", uri_encode($_), ' ',
1966					  uri_encode($v), "\n";
1967			}
1968		}
1969	} else {
1970		map { $log_entry{$_} = $logged->{$_} } keys %$logged;
1971	}
1972	close $un or croak $!;
1973
1974	$log_entry{date} = parse_svn_date($log_entry{date});
1975	$log_entry{log} .= "\n";
1976	my $author = $log_entry{author} = check_author($log_entry{author});
1977	my ($name, $email) = defined $::users{$author} ? @{$::users{$author}}
1978						       : ($author, undef);
1979
1980	my ($commit_name, $commit_email) = ($name, $email);
1981	if ($_use_log_author) {
1982		my $name_field;
1983		if ($log_entry{log} =~ /From:\s+(.*\S)\s*\n/i) {
1984			$name_field = $1;
1985		} elsif ($log_entry{log} =~ /Signed-off-by:\s+(.*\S)\s*\n/i) {
1986			$name_field = $1;
1987		}
1988		if (!defined $name_field) {
1989			if (!defined $email) {
1990				$email = $name;
1991			}
1992		} elsif ($name_field =~ /(.*?)\s+<(.*)>/) {
1993			($name, $email) = ($1, $2);
1994		} elsif ($name_field =~ /(.*)@/) {
1995			($name, $email) = ($1, $name_field);
1996		} else {
1997			($name, $email) = ($name_field, $name_field);
1998		}
1999	}
2000	if (defined $headrev && $self->use_svm_props) {
2001		if ($self->rewrite_root) {
2002			die "Can't have both 'useSvmProps' and 'rewriteRoot' ",
2003			    "options set!\n";
2004		}
2005		if ($self->rewrite_uuid) {
2006			die "Can't have both 'useSvmProps' and 'rewriteUUID' ",
2007			    "options set!\n";
2008		}
2009		my ($uuid, $r) = $headrev =~ m{^([a-f\d\-]{30,}):(\d+)$}i;
2010		# we don't want "SVM: initializing mirror for junk" ...
2011		return undef if $r == 0;
2012		my $svm = $self->svm;
2013		if ($uuid ne $svm->{uuid}) {
2014			die "UUID mismatch on SVM path:\n",
2015			    "expected: $svm->{uuid}\n",
2016			    "     got: $uuid\n";
2017		}
2018		my $full_url = $self->full_url;
2019		$full_url =~ s#^\Q$svm->{replace}\E(/|$)#$svm->{source}$1# or
2020		             die "Failed to replace '$svm->{replace}' with ",
2021		                 "'$svm->{source}' in $full_url\n";
2022		# throw away username for storing in records
2023		remove_username($full_url);
2024		$log_entry{metadata} = "$full_url\@$r $uuid";
2025		$log_entry{svm_revision} = $r;
2026		$email = "$author\@$uuid" unless defined $email;
2027		$commit_email = "$author\@$uuid" unless defined $commit_email;
2028	} elsif ($self->use_svnsync_props) {
2029		my $full_url = canonicalize_url(
2030			add_path_to_url( $self->svnsync->{url}, $self->path )
2031		);
2032		remove_username($full_url);
2033		my $uuid = $self->svnsync->{uuid};
2034		$log_entry{metadata} = "$full_url\@$rev $uuid";
2035		$email = "$author\@$uuid" unless defined $email;
2036		$commit_email = "$author\@$uuid" unless defined $commit_email;
2037	} else {
2038		my $url = $self->metadata_url;
2039		remove_username($url);
2040		my $uuid = $self->rewrite_uuid || $self->ra->get_uuid;
2041		$log_entry{metadata} = "$url\@$rev " . $uuid;
2042		$email = "$author\@$uuid" unless defined $email;
2043		$commit_email = "$author\@$uuid" unless defined $commit_email;
2044	}
2045	$log_entry{name} = $name;
2046	$log_entry{email} = $email;
2047	$log_entry{commit_name} = $commit_name;
2048	$log_entry{commit_email} = $commit_email;
2049	\%log_entry;
2050}
2051
2052sub fetch {
2053	my ($self, $min_rev, $max_rev, @parents) = @_;
2054	my ($last_rev, $last_commit) = $self->last_rev_commit;
2055	my ($base, $head) = $self->get_fetch_range($min_rev, $max_rev);
2056	$self->ra->gs_fetch_loop_common($base, $head, [$self]);
2057}
2058
2059sub set_tree_cb {
2060	my ($self, $log_entry, $tree, $rev, $date, $author) = @_;
2061	$self->{inject_parents} = { $rev => $tree };
2062	$self->fetch(undef, undef);
2063}
2064
2065sub set_tree {
2066	my ($self, $tree) = (shift, shift);
2067	my $log_entry = ::get_commit_entry($tree);
2068	unless ($self->{last_rev}) {
2069		fatal("Must have an existing revision to commit");
2070	}
2071	my %ed_opts = ( r => $self->{last_rev},
2072	                log => $log_entry->{log},
2073	                ra => $self->ra,
2074	                tree_a => $self->{last_commit},
2075	                tree_b => $tree,
2076	                editor_cb => sub {
2077			       $self->set_tree_cb($log_entry, $tree, @_) },
2078	                svn_path => $self->path );
2079	if (!Git::SVN::Editor->new(\%ed_opts)->apply_diff) {
2080		print "No changes\nr$self->{last_rev} = $tree\n";
2081	}
2082}
2083
2084sub rebuild_from_rev_db {
2085	my ($self, $path) = @_;
2086	my $r = -1;
2087	open my $fh, '<', $path or croak "open: $!";
2088	binmode $fh or croak "binmode: $!";
2089	while (<$fh>) {
2090		length($_) == $::oid_length + 1 or croak "inconsistent size in ($_)";
2091		chomp($_);
2092		++$r;
2093		next if $_ eq ('0' x $::oid_length);
2094		$self->rev_map_set($r, $_);
2095		print "r$r = $_\n";
2096	}
2097	close $fh or croak "close: $!";
2098	unlink $path or croak "unlink: $!";
2099}
2100
2101#define a global associate map to record rebuild status
2102my %rebuild_status;
2103#define a global associate map to record rebuild verify status
2104my %rebuild_verify_status;
2105
2106sub rebuild {
2107	my ($self) = @_;
2108	my $map_path = $self->map_path;
2109	my $partial = (-e $map_path && ! -z $map_path);
2110	my $verify_key = $self->refname.'^0';
2111	if (!$rebuild_verify_status{$verify_key}) {
2112		my $verify_result = ::verify_ref($verify_key);
2113		if ($verify_result) {
2114			$rebuild_verify_status{$verify_key} = 1;
2115		}
2116	}
2117	if (!$rebuild_verify_status{$verify_key}) {
2118		return;
2119	}
2120	if (!$partial && ($self->use_svm_props || $self->no_metadata)) {
2121		my $rev_db = $self->rev_db_path;
2122		$self->rebuild_from_rev_db($rev_db);
2123		if ($self->use_svm_props) {
2124			my $svm_rev_db = $self->rev_db_path($self->svm_uuid);
2125			$self->rebuild_from_rev_db($svm_rev_db);
2126		}
2127		$self->unlink_rev_db_symlink;
2128		return;
2129	}
2130	print "Rebuilding $map_path ...\n" if (!$partial);
2131	my ($base_rev, $head) = ($partial ? $self->rev_map_max_norebuild(1) :
2132		(undef, undef));
2133	my $key_value = ($head ? "$head.." : "") . $self->refname;
2134	if (exists $rebuild_status{$key_value}) {
2135		print "Done rebuilding $map_path\n" if (!$partial || !$head);
2136		my $rev_db_path = $self->rev_db_path;
2137		if (-f $self->rev_db_path) {
2138			unlink $self->rev_db_path or croak "unlink: $!";
2139		}
2140		$self->unlink_rev_db_symlink;
2141		return;
2142	}
2143	my ($log, $ctx) =
2144		command_output_pipe(qw/rev-list --pretty=raw --reverse/,
2145				$key_value,
2146				'--');
2147	$rebuild_status{$key_value} = 1;
2148	my $metadata_url = $self->metadata_url;
2149	remove_username($metadata_url);
2150	my $svn_uuid = $self->rewrite_uuid || $self->ra_uuid;
2151	my $c;
2152	while (<$log>) {
2153		if ( m{^commit ($::oid)$} ) {
2154			$c = $1;
2155			next;
2156		}
2157		next unless s{^\s*(git-svn-id:)}{$1};
2158		my ($url, $rev, $uuid) = ::extract_metadata($_);
2159		remove_username($url);
2160
2161		# ignore merges (from set-tree)
2162		next if (!defined $rev || !$uuid);
2163
2164		# if we merged or otherwise started elsewhere, this is
2165		# how we break out of it
2166		if (($uuid ne $svn_uuid) ||
2167		    ($metadata_url && $url && ($url ne $metadata_url))) {
2168			next;
2169		}
2170		if ($partial && $head) {
2171			print "Partial-rebuilding $map_path ...\n";
2172			print "Currently at $base_rev = $head\n";
2173			$head = undef;
2174		}
2175
2176		$self->rev_map_set($rev, $c);
2177		print "r$rev = $c\n";
2178	}
2179	command_close_pipe($log, $ctx);
2180	print "Done rebuilding $map_path\n" if (!$partial || !$head);
2181	my $rev_db_path = $self->rev_db_path;
2182	if (-f $self->rev_db_path) {
2183		unlink $self->rev_db_path or croak "unlink: $!";
2184	}
2185	$self->unlink_rev_db_symlink;
2186}
2187
2188# rev_map:
2189# Tie::File seems to be prone to offset errors if revisions get sparse,
2190# it's not that fast, either.  Tie::File is also not in Perl 5.6.  So
2191# one of my favorite modules is out :<  Next up would be one of the DBM
2192# modules, but I'm not sure which is most portable...
2193#
2194# This is the replacement for the rev_db format, which was too big
2195# and inefficient for large repositories with a lot of sparse history
2196# (mainly tags)
2197#
2198# The format is this:
2199#   - 24 or 36 bytes for every record,
2200#     * 4 bytes for the integer representing an SVN revision number
2201#     * 20 or 32 bytes representing the oid of a git commit
2202#   - No empty padding records like the old format
2203#     (except the last record, which can be overwritten)
2204#   - new records are written append-only since SVN revision numbers
2205#     increase monotonically
2206#   - lookups on SVN revision number are done via a binary search
2207#   - Piping the file to xxd -c24 is a good way of dumping it for
2208#     viewing or editing (piped back through xxd -r), should the need
2209#     ever arise.
2210#   - The last record can be padding revision with an all-zero oid
2211#     This is used to optimize fetch performance when using multiple
2212#     "fetch" directives in .git/config
2213#
2214# These files are disposable unless noMetadata or useSvmProps is set
2215
2216sub _rev_map_set {
2217	my ($fh, $rev, $commit) = @_;
2218	my $record_size = ($::oid_length / 2) + 4;
2219
2220	binmode $fh or croak "binmode: $!";
2221	my $size = (stat($fh))[7];
2222	($size % $record_size) == 0 or croak "inconsistent size: $size";
2223
2224	my $wr_offset = 0;
2225	if ($size > 0) {
2226		sysseek($fh, -$record_size, SEEK_END) or croak "seek: $!";
2227		my $read = sysread($fh, my $buf, $record_size) or croak "read: $!";
2228		$read == $record_size or croak "read only $read bytes (!= $record_size)";
2229		my ($last_rev, $last_commit) = unpack(rev_map_fmt, $buf);
2230		if ($last_commit eq ('0' x $::oid_length)) {
2231			if ($size >= ($record_size * 2)) {
2232				sysseek($fh, -($record_size * 2), SEEK_END) or croak "seek: $!";
2233				$read = sysread($fh, $buf, $record_size) or
2234				    croak "read: $!";
2235				$read == $record_size or
2236				    croak "read only $read bytes (!= $record_size)";
2237				($last_rev, $last_commit) =
2238				    unpack(rev_map_fmt, $buf);
2239				if ($last_commit eq ('0' x $::oid_length)) {
2240					croak "inconsistent .rev_map\n";
2241				}
2242			}
2243			if ($last_rev >= $rev) {
2244				croak "last_rev is higher!: $last_rev >= $rev";
2245			}
2246			$wr_offset = -$record_size;
2247		}
2248	}
2249	sysseek($fh, $wr_offset, SEEK_END) or croak "seek: $!";
2250	syswrite($fh, pack(rev_map_fmt, $rev, $commit), $record_size) == $record_size or
2251	  croak "write: $!";
2252}
2253
2254sub _rev_map_reset {
2255	my ($fh, $rev, $commit) = @_;
2256	my $c = _rev_map_get($fh, $rev);
2257	$c eq $commit or die "_rev_map_reset(@_) commit $c does not match!\n";
2258	my $offset = sysseek($fh, 0, SEEK_CUR) or croak "seek: $!";
2259	truncate $fh, $offset or croak "truncate: $!";
2260}
2261
2262sub mkfile {
2263	my ($path) = @_;
2264	unless (-e $path) {
2265		my ($dir, $base) = ($path =~ m#^(.*?)/?([^/]+)$#);
2266		mkpath([$dir]) unless -d $dir;
2267		open my $fh, '>>', $path or die "Couldn't create $path: $!\n";
2268		close $fh or die "Couldn't close (create) $path: $!\n";
2269	}
2270}
2271
2272sub rev_map_set {
2273	my ($self, $rev, $commit, $update_ref, $uuid) = @_;
2274	defined $commit or die "missing arg3\n";
2275	$commit =~ /^$::oid$/ or die "arg3 must be a full hex object ID\n";
2276	my $db = $self->map_path($uuid);
2277	my $db_lock = "$db.lock";
2278	my $sigmask;
2279	$update_ref ||= 0;
2280	if ($update_ref) {
2281		$sigmask = POSIX::SigSet->new();
2282		my $signew = POSIX::SigSet->new(SIGINT, SIGHUP, SIGTERM,
2283			SIGALRM, SIGUSR1, SIGUSR2);
2284		sigprocmask(SIG_BLOCK, $signew, $sigmask) or
2285			croak "Can't block signals: $!";
2286	}
2287	mkfile($db);
2288
2289	$LOCKFILES{$db_lock} = 1;
2290	my $sync;
2291	# both of these options make our .rev_db file very, very important
2292	# and we can't afford to lose it because rebuild() won't work
2293	if ($self->use_svm_props || $self->no_metadata) {
2294		require File::Copy;
2295		$sync = 1;
2296		File::Copy::copy($db, $db_lock) or die "rev_map_set(@_): ",
2297					   "Failed to copy: ",
2298					   "$db => $db_lock ($!)\n";
2299	} else {
2300		rename $db, $db_lock or die "rev_map_set(@_): ",
2301					    "Failed to rename: ",
2302					    "$db => $db_lock ($!)\n";
2303	}
2304
2305	sysopen(my $fh, $db_lock, O_RDWR | O_CREAT)
2306	     or croak "Couldn't open $db_lock: $!\n";
2307	if ($update_ref eq 'reset') {
2308		clear_memoized_mergeinfo_caches();
2309		_rev_map_reset($fh, $rev, $commit);
2310	} else {
2311		_rev_map_set($fh, $rev, $commit);
2312	}
2313
2314	if ($sync) {
2315		$fh->flush or die "Couldn't flush $db_lock: $!\n";
2316		$fh->sync or die "Couldn't sync $db_lock: $!\n";
2317	}
2318	close $fh or croak $!;
2319	if ($update_ref) {
2320		$_head = $self;
2321		my $note = "";
2322		$note = " ($update_ref)" if ($update_ref !~ /^\d*$/);
2323		command_noisy('update-ref', '-m', "r$rev$note",
2324		              $self->refname, $commit);
2325	}
2326	rename $db_lock, $db or die "rev_map_set(@_): ", "Failed to rename: ",
2327	                            "$db_lock => $db ($!)\n";
2328	delete $LOCKFILES{$db_lock};
2329	if ($update_ref) {
2330		sigprocmask(SIG_SETMASK, $sigmask) or
2331			croak "Can't restore signal mask: $!";
2332	}
2333}
2334
2335# If want_commit, this will return an array of (rev, commit) where
2336# commit _must_ be a valid commit in the archive.
2337# Otherwise, it'll return the max revision (whether or not the
2338# commit is valid or just a 0x40 placeholder).
2339sub rev_map_max {
2340	my ($self, $want_commit) = @_;
2341	$self->rebuild;
2342	my ($r, $c) = $self->rev_map_max_norebuild($want_commit);
2343	$want_commit ? ($r, $c) : $r;
2344}
2345
2346sub rev_map_max_norebuild {
2347	my ($self, $want_commit) = @_;
2348	my $record_size = ($::oid_length / 2) + 4;
2349	my $map_path = $self->map_path;
2350	stat $map_path or return $want_commit ? (0, undef) : 0;
2351	sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
2352	binmode $fh or croak "binmode: $!";
2353	my $size = (stat($fh))[7];
2354	($size % $record_size) == 0 or croak "inconsistent size: $size";
2355
2356	if ($size == 0) {
2357		close $fh or croak "close: $!";
2358		return $want_commit ? (0, undef) : 0;
2359	}
2360
2361	sysseek($fh, -$record_size, SEEK_END) or croak "seek: $!";
2362	sysread($fh, my $buf, $record_size) == $record_size or croak "read: $!";
2363	my ($r, $c) = unpack(rev_map_fmt, $buf);
2364	if ($want_commit && $c eq ('0' x $::oid_length)) {
2365		if ($size < $record_size * 2) {
2366			return $want_commit ? (0, undef) : 0;
2367		}
2368		sysseek($fh, -($record_size * 2), SEEK_END) or croak "seek: $!";
2369		sysread($fh, $buf, $record_size) == $record_size or croak "read: $!";
2370		($r, $c) = unpack(rev_map_fmt, $buf);
2371		if ($c eq ('0' x $::oid_length)) {
2372			croak "Penultimate record is all-zeroes in $map_path";
2373		}
2374	}
2375	close $fh or croak "close: $!";
2376	$want_commit ? ($r, $c) : $r;
2377}
2378
2379sub rev_map_get {
2380	my ($self, $rev, $uuid) = @_;
2381	my $map_path = $self->map_path($uuid);
2382	return undef unless -e $map_path;
2383
2384	sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
2385	my $c = _rev_map_get($fh, $rev);
2386	close($fh) or croak "close: $!";
2387	$c
2388}
2389
2390sub _rev_map_get {
2391	my ($fh, $rev) = @_;
2392	my $record_size = ($::oid_length / 2) + 4;
2393
2394	binmode $fh or croak "binmode: $!";
2395	my $size = (stat($fh))[7];
2396	($size % $record_size) == 0 or croak "inconsistent size: $size";
2397
2398	if ($size == 0) {
2399		return undef;
2400	}
2401
2402	my ($l, $u) = (0, $size - $record_size);
2403	my ($r, $c, $buf);
2404
2405	while ($l <= $u) {
2406		my $i = int(($l/$record_size + $u/$record_size) / 2) * $record_size;
2407		sysseek($fh, $i, SEEK_SET) or croak "seek: $!";
2408		sysread($fh, my $buf, $record_size) == $record_size or croak "read: $!";
2409		my ($r, $c) = unpack(rev_map_fmt, $buf);
2410
2411		if ($r < $rev) {
2412			$l = $i + $record_size;
2413		} elsif ($r > $rev) {
2414			$u = $i - $record_size;
2415		} else { # $r == $rev
2416			return $c eq ('0' x $::oid_length) ? undef : $c;
2417		}
2418	}
2419	undef;
2420}
2421
2422# Finds the first svn revision that exists on (if $eq_ok is true) or
2423# before $rev for the current branch.  It will not search any lower
2424# than $min_rev.  Returns the git commit hash and svn revision number
2425# if found, else (undef, undef).
2426sub find_rev_before {
2427	my ($self, $rev, $eq_ok, $min_rev) = @_;
2428	--$rev unless $eq_ok;
2429	$min_rev ||= 1;
2430	my $max_rev = $self->rev_map_max;
2431	$rev = $max_rev if ($rev > $max_rev);
2432	while ($rev >= $min_rev) {
2433		if (my $c = $self->rev_map_get($rev)) {
2434			return ($rev, $c);
2435		}
2436		--$rev;
2437	}
2438	return (undef, undef);
2439}
2440
2441# Finds the first svn revision that exists on (if $eq_ok is true) or
2442# after $rev for the current branch.  It will not search any higher
2443# than $max_rev.  Returns the git commit hash and svn revision number
2444# if found, else (undef, undef).
2445sub find_rev_after {
2446	my ($self, $rev, $eq_ok, $max_rev) = @_;
2447	++$rev unless $eq_ok;
2448	$max_rev ||= $self->rev_map_max;
2449	while ($rev <= $max_rev) {
2450		if (my $c = $self->rev_map_get($rev)) {
2451			return ($rev, $c);
2452		}
2453		++$rev;
2454	}
2455	return (undef, undef);
2456}
2457
2458sub _new {
2459	my ($class, $repo_id, $ref_id, $path) = @_;
2460	unless (defined $repo_id && length $repo_id) {
2461		$repo_id = $default_repo_id;
2462	}
2463	unless (defined $ref_id && length $ref_id) {
2464		# Access the prefix option from the git-svn main program if it's loaded.
2465		my $prefix = defined &::opt_prefix ? ::opt_prefix() : "";
2466		$_[2] = $ref_id =
2467		             "refs/remotes/$prefix$default_ref_id";
2468	}
2469	$_[1] = $repo_id;
2470	my $svn_dir = svn_dir();
2471	my $dir = "$svn_dir/$ref_id";
2472
2473	# Older repos imported by us used $svn_dir/foo instead of
2474	# $svn_dir/refs/remotes/foo when tracking refs/remotes/foo
2475	if ($ref_id =~ m{^refs/remotes/(.+)}) {
2476		my $old_dir = "$svn_dir/$1";
2477		if (-d $old_dir && ! -d $dir) {
2478			$dir = $old_dir;
2479		}
2480	}
2481
2482	$_[3] = $path = '' unless (defined $path);
2483	mkpath([$dir]);
2484	my $obj = bless {
2485		ref_id => $ref_id, dir => $dir, index => "$dir/index",
2486	        config => "$svn_dir/config",
2487	        map_root => "$dir/.rev_map", repo_id => $repo_id }, $class;
2488
2489	# Ensure it gets canonicalized
2490	$obj->path($path);
2491
2492	return $obj;
2493}
2494
2495sub path {
2496	my $self = shift;
2497
2498	if (@_) {
2499		my $path = shift;
2500		$self->{_path} = canonicalize_path($path);
2501		return;
2502	}
2503
2504	return $self->{_path};
2505}
2506
2507sub url {
2508	my $self = shift;
2509
2510	if (@_) {
2511		my $url = shift;
2512		$self->{url} = canonicalize_url($url);
2513		return;
2514	}
2515
2516	return $self->{url};
2517}
2518
2519# for read-only access of old .rev_db formats
2520sub unlink_rev_db_symlink {
2521	my ($self) = @_;
2522	my $link = $self->rev_db_path;
2523	$link =~ s/\.[\w-]+$// or croak "missing UUID at the end of $link";
2524	if (-l $link) {
2525		unlink $link or croak "unlink: $link failed!";
2526	}
2527}
2528
2529sub rev_db_path {
2530	my ($self, $uuid) = @_;
2531	my $db_path = $self->map_path($uuid);
2532	$db_path =~ s{/\.rev_map\.}{/\.rev_db\.}
2533	    or croak "map_path: $db_path does not contain '/.rev_map.' !";
2534	$db_path;
2535}
2536
2537# the new replacement for .rev_db
2538sub map_path {
2539	my ($self, $uuid) = @_;
2540	$uuid ||= $self->ra_uuid;
2541	"$self->{map_root}.$uuid";
2542}
2543
2544sub uri_encode {
2545	my ($f) = @_;
2546	$f =~ s#([^a-zA-Z0-9\*!\:_\./\-])#sprintf("%%%02X",ord($1))#eg;
2547	$f
2548}
2549
2550sub uri_decode {
2551	my ($f) = @_;
2552	$f =~ s#%([0-9a-fA-F]{2})#chr(hex($1))#eg;
2553	$f
2554}
2555
2556sub remove_username {
2557	$_[0] =~ s{^([^:]*://)[^@]+@}{$1};
2558}
2559
25601;
2561