1#!/usr/bin/perl 2# 3# This tool is copyright (c) 2005, Martin Langhoff. 4# It is released under the Gnu Public License, version 2. 5# 6# The basic idea is to walk the output of tla abrowse, 7# fetch the changesets and apply them. 8# 9 10=head1 Invocation 11 12 git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] 13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ] 14 15Imports a project from one or more Arch repositories. It will follow branches 16and repositories within the namespaces defined by the <archive/branch> 17parameters supplied. If it cannot find the remote branch a merge comes from 18it will just import it as a regular commit. If it can find it, it will mark it 19as a merge whenever possible. 20 21See man (1) git-archimport for more details. 22 23=head1 TODO 24 25 - create tag objects instead of ref tags 26 - audit shell-escaping of filenames 27 - hide our private tags somewhere smarter 28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines 29 - sort and apply patches by graphing ancestry relations instead of just 30 relying in dates supplied in the changeset itself. 31 tla ancestry-graph -m could be helpful here... 32 33=head1 Devel tricks 34 35Add print in front of the shell commands invoked via backticks. 36 37=head1 Devel Notes 38 39There are several places where Arch and git terminology are intermixed 40and potentially confused. 41 42The notion of a "branch" in git is approximately equivalent to 43a "archive/category--branch--version" in Arch. Also, it should be noted 44that the "--branch" portion of "archive/category--branch--version" is really 45optional in Arch although not many people (nor tools!) seem to know this. 46This means that "archive/category--version" is also a valid "branch" 47in git terms. 48 49We always refer to Arch names by their fully qualified variant (which 50means the "archive" name is prefixed. 51 52For people unfamiliar with Arch, an "archive" is the term for "repository", 53and can contain multiple, unrelated branches. 54 55=cut 56 57use 5.008; 58use strict; 59use warnings; 60use Getopt::Std; 61use File::Temp qw(tempdir); 62use File::Path qw(mkpath rmtree); 63use File::Basename qw(basename dirname); 64use Data::Dumper qw/ Dumper /; 65use IPC::Open2; 66 67$SIG{'PIPE'}="IGNORE"; 68$ENV{'TZ'}="UTC"; 69 70my $git_dir = $ENV{"GIT_DIR"} || ".git"; 71$ENV{"GIT_DIR"} = $git_dir; 72my $ptag_dir = "$git_dir/archimport/tags"; 73 74our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o); 75 76sub usage() { 77 print STDERR <<END; 78usage: git archimport # fetch/update GIT from Arch 79 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ] 80 repository/arch-branch [ repository/arch-branch] ... 81END 82 exit(1); 83} 84 85getopts("fThvat:D:") or usage(); 86usage if $opt_h; 87 88@ARGV >= 1 or usage(); 89# $arch_branches: 90# values associated with keys: 91# =1 - Arch version / git 'branch' detected via abrowse on a limit 92# >1 - Arch version / git 'branch' of an auxiliary branch we've merged 93my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV; 94 95# $branch_name_map: 96# maps arch branches to git branch names 97my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV; 98 99$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls: 100my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1); 101$opt_v && print "+ Using $tmp as temporary directory\n"; 102 103unless (-d $git_dir) { # initial import needs empty directory 104 opendir DIR, '.' or die "Unable to open current directory: $!\n"; 105 while (my $entry = readdir DIR) { 106 $entry =~ /^\.\.?$/ or 107 die "Initial import needs an empty current working directory.\n" 108 } 109 closedir DIR 110} 111 112my $default_archive; # default Arch archive 113my %reachable = (); # Arch repositories we can access 114my %unreachable = (); # Arch repositories we can't access :< 115my @psets = (); # the collection 116my %psets = (); # the collection, by name 117my %stats = ( # Track which strategy we used to import: 118 get_tag => 0, replay => 0, get_new => 0, get_delta => 0, 119 simple_changeset => 0, import_or_tag => 0 120); 121 122my %rptags = (); # my reverse private tags 123 # to map a SHA1 to a commitid 124my $TLA = $ENV{'ARCH_CLIENT'} || 'tla'; 125 126sub do_abrowse { 127 my $stage = shift; 128 while (my ($limit, $level) = each %arch_branches) { 129 next unless $level == $stage; 130 131 open ABROWSE, "$TLA abrowse -fkD --merges $limit |" 132 or die "Problems with tla abrowse: $!"; 133 134 my %ps = (); # the current one 135 my $lastseen = ''; 136 137 while (<ABROWSE>) { 138 chomp; 139 140 # first record padded w 8 spaces 141 if (s/^\s{8}\b//) { 142 my ($id, $type) = split(m/\s+/, $_, 2); 143 144 my %last_ps; 145 # store the record we just captured 146 if (%ps && !exists $psets{ $ps{id} }) { 147 %last_ps = %ps; # break references 148 push (@psets, \%last_ps); 149 $psets{ $last_ps{id} } = \%last_ps; 150 } 151 152 my $branch = extract_versionname($id); 153 %ps = ( id => $id, branch => $branch ); 154 if (%last_ps && ($last_ps{branch} eq $branch)) { 155 $ps{parent_id} = $last_ps{id}; 156 } 157 158 $arch_branches{$branch} = 1; 159 $lastseen = 'id'; 160 161 # deal with types (should work with baz or tla): 162 if ($type =~ m/\(.*changeset\)/) { 163 $ps{type} = 's'; 164 } elsif ($type =~ /\(.*import\)/) { 165 $ps{type} = 'i'; 166 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) { 167 $ps{type} = 't'; 168 # read which revision we've tagged when we parse the log 169 $ps{tag} = $1; 170 } else { 171 warn "Unknown type $type"; 172 } 173 174 $arch_branches{$branch} = 1; 175 $lastseen = 'id'; 176 } elsif (s/^\s{10}//) { 177 # 10 leading spaces or more 178 # indicate commit metadata 179 180 # date 181 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){ 182 $ps{date} = $1; 183 $lastseen = 'date'; 184 } elsif ($_ eq 'merges in:') { 185 $ps{merges} = []; 186 $lastseen = 'merges'; 187 } elsif ($lastseen eq 'merges' && s/^\s{2}//) { 188 my $id = $_; 189 push (@{$ps{merges}}, $id); 190 191 # aggressive branch finding: 192 if ($opt_D) { 193 my $branch = extract_versionname($id); 194 my $repo = extract_reponame($branch); 195 196 if (archive_reachable($repo) && 197 !defined $arch_branches{$branch}) { 198 $arch_branches{$branch} = $stage + 1; 199 } 200 } 201 } else { 202 warn "more metadata after merges!?: $_\n" unless /^\s*$/; 203 } 204 } 205 } 206 207 if (%ps && !exists $psets{ $ps{id} }) { 208 my %temp = %ps; # break references 209 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) { 210 $temp{parent_id} = $psets[$#psets]{id}; 211 } 212 push (@psets, \%temp); 213 $psets{ $temp{id} } = \%temp; 214 } 215 216 close ABROWSE or die "$TLA abrowse failed on $limit\n"; 217 } 218} # end foreach $root 219 220do_abrowse(1); 221my $depth = 2; 222$opt_D ||= 0; 223while ($depth <= $opt_D) { 224 do_abrowse($depth); 225 $depth++; 226} 227 228## Order patches by time 229# FIXME see if we can find a more optimal way to do this by graphing 230# the ancestry data and walking it, that way we won't have to rely on 231# client-supplied dates 232@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets; 233 234#print Dumper \@psets; 235 236## 237## TODO cleanup irrelevant patches 238## and put an initial import 239## or a full tag 240my $import = 0; 241unless (-d $git_dir) { # initial import 242 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') { 243 print "Starting import from $psets[0]{id}\n"; 244 `git-init`; 245 die $! if $?; 246 $import = 1; 247 } else { 248 die "Need to start from an import or a tag -- cannot use $psets[0]{id}"; 249 } 250} else { # progressing an import 251 # load the rptags 252 opendir(DIR, $ptag_dir) 253 || die "can't opendir: $!"; 254 while (my $file = readdir(DIR)) { 255 # skip non-interesting-files 256 next unless -f "$ptag_dir/$file"; 257 258 # convert first '--' to '/' from old git-archimport to use 259 # as an archivename/c--b--v private tag 260 if ($file !~ m!,!) { 261 my $oldfile = $file; 262 $file =~ s!--!,!; 263 print STDERR "converting old tag $oldfile to $file\n"; 264 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!; 265 } 266 my $sha = ptag($file); 267 chomp $sha; 268 $rptags{$sha} = $file; 269 } 270 closedir DIR; 271} 272 273# process patchsets 274# extract the Arch repository name (Arch "archive" in Arch-speak) 275sub extract_reponame { 276 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision] 277 return (split(/\//, $fq_cvbr))[0]; 278} 279 280sub extract_versionname { 281 my $name = shift; 282 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//; 283 return $name; 284} 285 286# convert a fully-qualified revision or version to a unique dirname: 287# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 288# becomes: normalperson@yhbt.net-05,mpd--uclinux--1 289# 290# the git notion of a branch is closer to 291# archive/category--branch--version than archive/category--branch, so we 292# use this to convert to git branch names. 293# Also, keep archive names but replace '/' with ',' since it won't require 294# subdirectories, and is safer than swapping '--' which could confuse 295# reverse-mapping when dealing with bastard branches that 296# are just archive/category--version (no --branch) 297sub tree_dirname { 298 my $revision = shift; 299 my $name = extract_versionname($revision); 300 $name =~ s#/#,#; 301 return $name; 302} 303 304# old versions of git-archimport just use the <category--branch> part: 305sub old_style_branchname { 306 my $id = shift; 307 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id); 308 chomp $ret; 309 return $ret; 310} 311 312*git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname; 313 314# retrieve default archive, since $branch_name_map keys might not include it 315sub get_default_archive { 316 if (!defined $default_archive) { 317 $default_archive = safe_pipe_capture($TLA,'my-default-archive'); 318 chomp $default_archive; 319 } 320 return $default_archive; 321} 322 323sub git_branchname { 324 my $revision = shift; 325 my $name = extract_versionname($revision); 326 327 if (exists $branch_name_map{$name}) { 328 return $branch_name_map{$name}; 329 330 } elsif ($name =~ m#^([^/]*)/(.*)$# 331 && $1 eq get_default_archive() 332 && exists $branch_name_map{$2}) { 333 # the names given in the command-line lacked the archive. 334 return $branch_name_map{$2}; 335 336 } else { 337 return git_default_branchname($revision); 338 } 339} 340 341sub process_patchset_accurate { 342 my $ps = shift; 343 344 # switch to that branch if we're not already in that branch: 345 if (-e "$git_dir/refs/heads/$ps->{branch}") { 346 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n"; 347 348 # remove any old stuff that got leftover: 349 my $rm = safe_pipe_capture('git-ls-files','--others','-z'); 350 rmtree(split(/\0/,$rm)) if $rm; 351 } 352 353 # Apply the import/changeset/merge into the working tree 354 my $dir = sync_to_ps($ps); 355 # read the new log entry: 356 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id}); 357 die "Error in cat-log: $!" if $?; 358 chomp @commitlog; 359 360 # grab variables we want from the log, new fields get added to $ps: 361 # (author, date, email, summary, message body ...) 362 parselog($ps, \@commitlog); 363 364 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) { 365 # this should work when importing continuations 366 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) { 367 368 # find where we are supposed to branch from 369 if (! -e "$git_dir/refs/heads/$ps->{branch}") { 370 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n"; 371 372 # We trust Arch with the fact that this is just a tag, 373 # and it does not affect the state of the tree, so 374 # we just tag and move on. If the user really wants us 375 # to consolidate more branches into one, don't tag because 376 # the tag name would be already taken. 377 tag($ps->{id}, $branchpoint); 378 ptag($ps->{id}, $branchpoint); 379 print " * Tagged $ps->{id} at $branchpoint\n"; 380 } 381 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n"; 382 383 # remove any old stuff that got leftover: 384 my $rm = safe_pipe_capture('git-ls-files','--others','-z'); 385 rmtree(split(/\0/,$rm)) if $rm; 386 return 0; 387 } else { 388 warn "Tagging from unknown id unsupported\n" if $ps->{tag}; 389 } 390 # allow multiple bases/imports here since Arch supports cherry-picks 391 # from unrelated trees 392 } 393 394 # update the index with all the changes we got 395 system('git-diff-files --name-only -z | '. 396 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n"; 397 system('git-ls-files --others -z | '. 398 'git-update-index --add -z --stdin') == 0 or die "$! $?\n"; 399 return 1; 400} 401 402# the native changeset processing strategy. This is very fast, but 403# does not handle permissions or any renames involving directories 404sub process_patchset_fast { 405 my $ps = shift; 406 # 407 # create the branch if needed 408 # 409 if ($ps->{type} eq 'i' && !$import) { 410 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}"; 411 } 412 413 unless ($import) { # skip for import 414 if ( -e "$git_dir/refs/heads/$ps->{branch}") { 415 # we know about this branch 416 system('git-checkout',$ps->{branch}); 417 } else { 418 # new branch! we need to verify a few things 419 die "Branch on a non-tag!" unless $ps->{type} eq 't'; 420 my $branchpoint = ptag($ps->{tag}); 421 die "Tagging from unknown id unsupported: $ps->{tag}" 422 unless $branchpoint; 423 424 # find where we are supposed to branch from 425 if (! -e "$git_dir/refs/heads/$ps->{branch}") { 426 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n"; 427 428 # We trust Arch with the fact that this is just a tag, 429 # and it does not affect the state of the tree, so 430 # we just tag and move on. If the user really wants us 431 # to consolidate more branches into one, don't tag because 432 # the tag name would be already taken. 433 tag($ps->{id}, $branchpoint); 434 ptag($ps->{id}, $branchpoint); 435 print " * Tagged $ps->{id} at $branchpoint\n"; 436 } 437 system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n"; 438 return 0; 439 } 440 die $! if $?; 441 } 442 443 # 444 # Apply the import/changeset/merge into the working tree 445 # 446 if ($ps->{type} eq 'i' || $ps->{type} eq 't') { 447 apply_import($ps) or die $!; 448 $stats{import_or_tag}++; 449 $import=0; 450 } elsif ($ps->{type} eq 's') { 451 apply_cset($ps); 452 $stats{simple_changeset}++; 453 } 454 455 # 456 # prepare update git's index, based on what arch knows 457 # about the pset, resolve parents, etc 458 # 459 460 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 461 die "Error in cat-archive-log: $!" if $?; 462 463 parselog($ps,\@commitlog); 464 465 # imports don't give us good info 466 # on added files. Shame on them 467 if ($ps->{type} eq 'i' || $ps->{type} eq 't') { 468 system('git-ls-files --deleted -z | '. 469 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n"; 470 system('git-ls-files --others -z | '. 471 'git-update-index --add -z --stdin') == 0 or die "$! $?\n"; 472 } 473 474 # TODO: handle removed_directories and renamed_directories: 475 476 if (my $del = $ps->{removed_files}) { 477 unlink @$del; 478 while (@$del) { 479 my @slice = splice(@$del, 0, 100); 480 system('git-update-index','--remove','--',@slice) == 0 or 481 die "Error in git-update-index --remove: $! $?\n"; 482 } 483 } 484 485 if (my $ren = $ps->{renamed_files}) { # renamed 486 if (@$ren % 2) { 487 die "Odd number of entries in rename!?"; 488 } 489 490 while (@$ren) { 491 my $from = shift @$ren; 492 my $to = shift @$ren; 493 494 unless (-d dirname($to)) { 495 mkpath(dirname($to)); # will die on err 496 } 497 # print "moving $from $to"; 498 rename($from, $to) or die "Error renaming '$from' '$to': $!\n"; 499 system('git-update-index','--remove','--',$from) == 0 or 500 die "Error in git-update-index --remove: $! $?\n"; 501 system('git-update-index','--add','--',$to) == 0 or 502 die "Error in git-update-index --add: $! $?\n"; 503 } 504 } 505 506 if (my $add = $ps->{new_files}) { 507 while (@$add) { 508 my @slice = splice(@$add, 0, 100); 509 system('git-update-index','--add','--',@slice) == 0 or 510 die "Error in git-update-index --add: $! $?\n"; 511 } 512 } 513 514 if (my $mod = $ps->{modified_files}) { 515 while (@$mod) { 516 my @slice = splice(@$mod, 0, 100); 517 system('git-update-index','--',@slice) == 0 or 518 die "Error in git-update-index: $! $?\n"; 519 } 520 } 521 return 1; # we successfully applied the changeset 522} 523 524if ($opt_f) { 525 print "Will import patchsets using the fast strategy\n", 526 "Renamed directories and permission changes will be missed\n"; 527 *process_patchset = *process_patchset_fast; 528} else { 529 print "Using the default (accurate) import strategy.\n", 530 "Things may be a bit slow\n"; 531 *process_patchset = *process_patchset_accurate; 532} 533 534foreach my $ps (@psets) { 535 # process patchsets 536 $ps->{branch} = git_branchname($ps->{id}); 537 538 # 539 # ensure we have a clean state 540 # 541 if (my $dirty = `git-diff-files`) { 542 die "Unclean tree when about to process $ps->{id} " . 543 " - did we fail to commit cleanly before?\n$dirty"; 544 } 545 die $! if $?; 546 547 # 548 # skip commits already in repo 549 # 550 if (ptag($ps->{id})) { 551 $opt_v && print " * Skipping already imported: $ps->{id}\n"; 552 next; 553 } 554 555 print " * Starting to work on $ps->{id}\n"; 556 557 process_patchset($ps) or next; 558 559 # warn "errors when running git-update-index! $!"; 560 my $tree = `git-write-tree`; 561 die "cannot write tree $!" if $?; 562 chomp $tree; 563 564 # 565 # Who's your daddy? 566 # 567 my @par; 568 if ( -e "$git_dir/refs/heads/$ps->{branch}") { 569 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") { 570 my $p = <HEAD>; 571 close HEAD; 572 chomp $p; 573 push @par, '-p', $p; 574 } else { 575 if ($ps->{type} eq 's') { 576 warn "Could not find the right head for the branch $ps->{branch}"; 577 } 578 } 579 } 580 581 if ($ps->{merges}) { 582 push @par, find_parents($ps); 583 } 584 585 # 586 # Commit, tag and clean state 587 # 588 $ENV{TZ} = 'GMT'; 589 $ENV{GIT_AUTHOR_NAME} = $ps->{author}; 590 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email}; 591 $ENV{GIT_AUTHOR_DATE} = $ps->{date}; 592 $ENV{GIT_COMMITTER_NAME} = $ps->{author}; 593 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email}; 594 $ENV{GIT_COMMITTER_DATE} = $ps->{date}; 595 596 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 597 or die $!; 598 print WRITER $ps->{summary},"\n\n"; 599 600 # only print message if it's not empty, to avoid a spurious blank line; 601 # also append an extra newline, so there's a blank line before the 602 # following "git-archimport-id:" line. 603 print WRITER $ps->{message},"\n\n" if ($ps->{message} ne ""); 604 605 # make it easy to backtrack and figure out which Arch revision this was: 606 print WRITER 'git-archimport-id: ',$ps->{id},"\n"; 607 608 close WRITER; 609 my $commitid = <READER>; # read 610 chomp $commitid; 611 close READER; 612 waitpid $pid,0; # close; 613 614 if (length $commitid != 40) { 615 die "Something went wrong with the commit! $! $commitid"; 616 } 617 # 618 # Update the branch 619 # 620 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}"; 621 print HEAD $commitid; 622 close HEAD; 623 system('git-update-ref', 'HEAD', "$ps->{branch}"); 624 625 # tag accordingly 626 ptag($ps->{id}, $commitid); # private tag 627 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') { 628 tag($ps->{id}, $commitid); 629 } 630 print " * Committed $ps->{id}\n"; 631 print " + tree $tree\n"; 632 print " + commit $commitid\n"; 633 $opt_v && print " + commit date is $ps->{date} \n"; 634 $opt_v && print " + parents: ",join(' ',@par),"\n"; 635} 636 637if ($opt_v) { 638 foreach (sort keys %stats) { 639 print" $_: $stats{$_}\n"; 640 } 641} 642exit 0; 643 644# used by the accurate strategy: 645sub sync_to_ps { 646 my $ps = shift; 647 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id}); 648 649 $opt_v && print "sync_to_ps($ps->{id}) method: "; 650 651 if (-d $tree_dir) { 652 if ($ps->{type} eq 't') { 653 $opt_v && print "get (tag)\n"; 654 # looks like a tag-only or (worse,) a mixed tags/changeset branch, 655 # can't rely on replay to work correctly on these 656 rmtree($tree_dir); 657 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir); 658 $stats{get_tag}++; 659 } else { 660 my $tree_id = arch_tree_id($tree_dir); 661 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) { 662 # the common case (hopefully) 663 $opt_v && print "replay\n"; 664 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id}); 665 $stats{replay}++; 666 } else { 667 # getting one tree is usually faster than getting two trees 668 # and applying the delta ... 669 rmtree($tree_dir); 670 $opt_v && print "apply-delta\n"; 671 safe_pipe_capture($TLA,'get','--no-pristine', 672 $ps->{id},$tree_dir); 673 $stats{get_delta}++; 674 } 675 } 676 } else { 677 # new branch work 678 $opt_v && print "get (new tree)\n"; 679 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir); 680 $stats{get_new}++; 681 } 682 683 # added -I flag to rsync since we're going to fast! AIEEEEE!!!! 684 system('rsync','-aI','--delete','--exclude',$git_dir, 685# '--exclude','.arch-inventory', 686 '--exclude','.arch-ids','--exclude','{arch}', 687 '--exclude','+*','--exclude',',*', 688 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?"; 689 return $tree_dir; 690} 691 692sub apply_import { 693 my $ps = shift; 694 my $bname = git_branchname($ps->{id}); 695 696 mkpath($tmp); 697 698 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import"); 699 die "Cannot get import: $!" if $?; 700 system('rsync','-aI','--delete', '--exclude',$git_dir, 701 '--exclude','.arch-ids','--exclude','{arch}', 702 "$tmp/import/", './'); 703 die "Cannot rsync import:$!" if $?; 704 705 rmtree("$tmp/import"); 706 die "Cannot remove tempdir: $!" if $?; 707 708 709 return 1; 710} 711 712sub apply_cset { 713 my $ps = shift; 714 715 mkpath($tmp); 716 717 # get the changeset 718 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset"); 719 die "Cannot get changeset: $!" if $?; 720 721 # apply patches 722 if (`find $tmp/changeset/patches -type f -name '*.patch'`) { 723 # this can be sped up considerably by doing 724 # (find | xargs cat) | patch 725 # but that can get mucked up by patches 726 # with missing trailing newlines or the standard 727 # 'missing newline' flag in the patch - possibly 728 # produced with an old/buggy diff. 729 # slow and safe, we invoke patch once per patchfile 730 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`; 731 die "Problem applying patches! $!" if $?; 732 } 733 734 # apply changed binary files 735 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) { 736 foreach my $mod (@modified) { 737 chomp $mod; 738 my $orig = $mod; 739 $orig =~ s/\.modified$//; # lazy 740 $orig =~ s!^\Q$tmp\E/changeset/patches/!!; 741 #print "rsync -p '$mod' '$orig'"; 742 system('rsync','-p',$mod,"./$orig"); 743 die "Problem applying binary changes! $!" if $?; 744 } 745 } 746 747 # bring in new files 748 system('rsync','-aI','--exclude',$git_dir, 749 '--exclude','.arch-ids', 750 '--exclude', '{arch}', 751 "$tmp/changeset/new-files-archive/",'./'); 752 753 # deleted files are hinted from the commitlog processing 754 755 rmtree("$tmp/changeset"); 756} 757 758 759# =for reference 760# notes: *-files/-directories keys cannot have spaces, they're always 761# pika-escaped. Everything after the first newline 762# A log entry looks like: 763# Revision: moodle-org--moodle--1.3.3--patch-15 764# Archive: arch-eduforge@catalyst.net.nz--2004 765# Creator: Penny Leach <penny@catalyst.net.nz> 766# Date: Wed May 25 14:15:34 NZST 2005 767# Standard-date: 2005-05-25 02:15:34 GMT 768# New-files: lang/de/.arch-ids/block_glossary_random.php.id 769# lang/de/.arch-ids/block_html.php.id 770# New-directories: lang/de/help/questionnaire 771# lang/de/help/questionnaire/.arch-ids 772# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id 773# db_sears.sql db/db_sears.sql 774# Removed-files: lang/be/docs/.arch-ids/release.html.id 775# lang/be/docs/.arch-ids/releaseold.html.id 776# Modified-files: admin/cron.php admin/delete.php 777# admin/editor.html backup/lib.php backup/restore.php 778# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15 779# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+) 780# summary can be multiline with a leading space just like the above fields 781# Keywords: 782# 783# Updating yadda tadda tadda madda 784sub parselog { 785 my ($ps, $log) = @_; 786 my $key = undef; 787 788 # headers we want that contain filenames: 789 my %want_headers = ( 790 new_files => 1, 791 modified_files => 1, 792 renamed_files => 1, 793 renamed_directories => 1, 794 removed_files => 1, 795 removed_directories => 1, 796 ); 797 798 chomp (@$log); 799 while ($_ = shift @$log) { 800 if (/^Continuation-of:\s*(.*)/) { 801 $ps->{tag} = $1; 802 $key = undef; 803 } elsif (/^Summary:\s*(.*)$/ ) { 804 # summary can be multiline as long as it has a leading space. 805 # we squeeze it onto a single line, though. 806 $ps->{summary} = [ $1 ]; 807 $key = 'summary'; 808 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) { 809 $ps->{author} = $1; 810 $ps->{email} = $2; 811 $key = undef; 812 # any *-files or *-directories can be read here: 813 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) { 814 my $val = $2; 815 $key = lc $1; 816 $key =~ tr/-/_/; # too lazy to quote :P 817 if ($want_headers{$key}) { 818 push @{$ps->{$key}}, split(/\s+/, $val); 819 } else { 820 $key = undef; 821 } 822 } elsif (/^$/) { 823 last; # remainder of @$log that didn't get shifted off is message 824 } elsif ($key) { 825 if (/^\s+(.*)$/) { 826 if ($key eq 'summary') { 827 push @{$ps->{$key}}, $1; 828 } else { # files/directories: 829 push @{$ps->{$key}}, split(/\s+/, $1); 830 } 831 } else { 832 $key = undef; 833 } 834 } 835 } 836 837 # drop leading empty lines from the log message 838 while (@$log && $log->[0] eq '') { 839 shift @$log; 840 } 841 if (exists $ps->{summary} && @{$ps->{summary}}) { 842 $ps->{summary} = join(' ', @{$ps->{summary}}); 843 } 844 elsif (@$log == 0) { 845 $ps->{summary} = 'empty commit message'; 846 } else { 847 $ps->{summary} = $log->[0] . '...'; 848 } 849 $ps->{message} = join("\n",@$log); 850 851 # skip Arch control files, unescape pika-escaped files 852 foreach my $k (keys %want_headers) { 853 next unless (defined $ps->{$k}); 854 my @tmp = (); 855 foreach my $t (@{$ps->{$k}}) { 856 next unless length ($t); 857 next if $t =~ m!\{arch\}/!; 858 next if $t =~ m!\.arch-ids/!; 859 # should we skip this? 860 next if $t =~ m!\.arch-inventory$!; 861 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why? 862 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of. 863 if ($t =~ /\\/ ){ 864 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0]; 865 } 866 push @tmp, $t; 867 } 868 $ps->{$k} = \@tmp; 869 } 870} 871 872# write/read a tag 873sub tag { 874 my ($tag, $commit) = @_; 875 876 if ($opt_o) { 877 $tag =~ s|/|--|g; 878 } else { 879 my $patchname = $tag; 880 $patchname =~ s/.*--//; 881 $tag = git_branchname ($tag) . '--' . $patchname; 882 } 883 884 if ($commit) { 885 open(C,">","$git_dir/refs/tags/$tag") 886 or die "Cannot create tag $tag: $!\n"; 887 print C "$commit\n" 888 or die "Cannot write tag $tag: $!\n"; 889 close(C) 890 or die "Cannot write tag $tag: $!\n"; 891 print " * Created tag '$tag' on '$commit'\n" if $opt_v; 892 } else { # read 893 open(C,"<","$git_dir/refs/tags/$tag") 894 or die "Cannot read tag $tag: $!\n"; 895 $commit = <C>; 896 chomp $commit; 897 die "Error reading tag $tag: $!\n" unless length $commit == 40; 898 close(C) 899 or die "Cannot read tag $tag: $!\n"; 900 return $commit; 901 } 902} 903 904# write/read a private tag 905# reads fail softly if the tag isn't there 906sub ptag { 907 my ($tag, $commit) = @_; 908 909 # don't use subdirs for tags yet, it could screw up other porcelains 910 $tag =~ s|/|,|g; 911 912 my $tag_file = "$ptag_dir/$tag"; 913 my $tag_branch_dir = dirname($tag_file); 914 mkpath($tag_branch_dir) unless (-d $tag_branch_dir); 915 916 if ($commit) { # write 917 open(C,">",$tag_file) 918 or die "Cannot create tag $tag: $!\n"; 919 print C "$commit\n" 920 or die "Cannot write tag $tag: $!\n"; 921 close(C) 922 or die "Cannot write tag $tag: $!\n"; 923 $rptags{$commit} = $tag 924 unless $tag =~ m/--base-0$/; 925 } else { # read 926 # if the tag isn't there, return 0 927 unless ( -s $tag_file) { 928 return 0; 929 } 930 open(C,"<",$tag_file) 931 or die "Cannot read tag $tag: $!\n"; 932 $commit = <C>; 933 chomp $commit; 934 die "Error reading tag $tag: $!\n" unless length $commit == 40; 935 close(C) 936 or die "Cannot read tag $tag: $!\n"; 937 unless (defined $rptags{$commit}) { 938 $rptags{$commit} = $tag; 939 } 940 return $commit; 941 } 942} 943 944sub find_parents { 945 # 946 # Identify what branches are merging into me 947 # and whether we are fully merged 948 # git-merge-base <headsha> <headsha> should tell 949 # me what the base of the merge should be 950 # 951 my $ps = shift; 952 953 my %branches; # holds an arrayref per branch 954 # the arrayref contains a list of 955 # merged patches between the base 956 # of the merge and the current head 957 958 my @parents; # parents found for this commit 959 960 # simple loop to split the merges 961 # per branch 962 foreach my $merge (@{$ps->{merges}}) { 963 my $branch = git_branchname($merge); 964 unless (defined $branches{$branch} ){ 965 $branches{$branch} = []; 966 } 967 push @{$branches{$branch}}, $merge; 968 } 969 970 # 971 # foreach branch find a merge base and walk it to the 972 # head where we are, collecting the merged patchsets that 973 # Arch has recorded. Keep that in @have 974 # Compare that with the commits on the other branch 975 # between merge-base and the tip of the branch (@need) 976 # and see if we have a series of consecutive patches 977 # starting from the merge base. The tip of the series 978 # of consecutive patches merged is our new parent for 979 # that branch. 980 # 981 foreach my $branch (keys %branches) { 982 983 # check that we actually know about the branch 984 next unless -e "$git_dir/refs/heads/$branch"; 985 986 my $mergebase = safe_pipe_capture(qw(git-merge-base), $branch, $ps->{branch}); 987 if ($?) { 988 # Don't die here, Arch supports one-way cherry-picking 989 # between branches with no common base (or any relationship 990 # at all beforehand) 991 warn "Cannot find merge base for $branch and $ps->{branch}"; 992 next; 993 } 994 chomp $mergebase; 995 996 # now walk up to the mergepoint collecting what patches we have 997 my $branchtip = git_rev_parse($ps->{branch}); 998 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`; 999 my %have; # collected merges this branch has 1000 foreach my $merge (@{$ps->{merges}}) { 1001 $have{$merge} = 1; 1002 } 1003 my %ancestorshave; 1004 foreach my $par (@ancestors) { 1005 $par = commitid2pset($par); 1006 if (defined $par->{merges}) { 1007 foreach my $merge (@{$par->{merges}}) { 1008 $ancestorshave{$merge}=1; 1009 } 1010 } 1011 } 1012 # print "++++ Merges in $ps->{id} are....\n"; 1013 # my @have = sort keys %have; print Dumper(\@have); 1014 1015 # merge what we have with what ancestors have 1016 %have = (%have, %ancestorshave); 1017 1018 # see what the remote branch has - these are the merges we 1019 # will want to have in a consecutive series from the mergebase 1020 my $otherbranchtip = git_rev_parse($branch); 1021 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`; 1022 my @need; 1023 foreach my $needps (@needraw) { # get the psets 1024 $needps = commitid2pset($needps); 1025 # git-rev-list will also 1026 # list commits merged in via earlier 1027 # merges. we are only interested in commits 1028 # from the branch we're looking at 1029 if ($branch eq $needps->{branch}) { 1030 push @need, $needps->{id}; 1031 } 1032 } 1033 1034 # print "++++ Merges from $branch we want are....\n"; 1035 # print Dumper(\@need); 1036 1037 my $newparent; 1038 while (my $needed_commit = pop @need) { 1039 if ($have{$needed_commit}) { 1040 $newparent = $needed_commit; 1041 } else { 1042 last; # break out of the while 1043 } 1044 } 1045 if ($newparent) { 1046 push @parents, $newparent; 1047 } 1048 1049 1050 } # end foreach branch 1051 1052 # prune redundant parents 1053 my %parents; 1054 foreach my $p (@parents) { 1055 $parents{$p} = 1; 1056 } 1057 foreach my $p (@parents) { 1058 next unless exists $psets{$p}{merges}; 1059 next unless ref $psets{$p}{merges}; 1060 my @merges = @{$psets{$p}{merges}}; 1061 foreach my $merge (@merges) { 1062 if ($parents{$merge}) { 1063 delete $parents{$merge}; 1064 } 1065 } 1066 } 1067 1068 @parents = (); 1069 foreach (keys %parents) { 1070 push @parents, '-p', ptag($_); 1071 } 1072 return @parents; 1073} 1074 1075sub git_rev_parse { 1076 my $name = shift; 1077 my $val = safe_pipe_capture(qw(git-rev-parse), $name); 1078 die "Error: git-rev-parse $name" if $?; 1079 chomp $val; 1080 return $val; 1081} 1082 1083# resolve a SHA1 to a known patchset 1084sub commitid2pset { 1085 my $commitid = shift; 1086 chomp $commitid; 1087 my $name = $rptags{$commitid} 1088 || die "Cannot find reverse tag mapping for $commitid"; 1089 $name =~ s|,|/|; 1090 my $ps = $psets{$name} 1091 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name"; 1092 return $ps; 1093} 1094 1095 1096# an alternative to `command` that allows input to be passed as an array 1097# to work around shell problems with weird characters in arguments 1098sub safe_pipe_capture { 1099 my @output; 1100 if (my $pid = open my $child, '-|') { 1101 @output = (<$child>); 1102 close $child or die join(' ',@_).": $! $?"; 1103 } else { 1104 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found 1105 } 1106 return wantarray ? @output : join('',@output); 1107} 1108 1109# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>` 1110sub arch_tree_id { 1111 my $dir = shift; 1112 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] ); 1113 return $ret; 1114} 1115 1116sub archive_reachable { 1117 my $archive = shift; 1118 return 1 if $reachable{$archive}; 1119 return 0 if $unreachable{$archive}; 1120 1121 if (system "$TLA whereis-archive $archive >/dev/null") { 1122 if ($opt_a && (system($TLA,'register-archive', 1123 "http://mirrors.sourcecontrol.net/$archive") == 0)) { 1124 $reachable{$archive} = 1; 1125 return 1; 1126 } 1127 print STDERR "Archive is unreachable: $archive\n"; 1128 $unreachable{$archive} = 1; 1129 return 0; 1130 } else { 1131 $reachable{$archive} = 1; 1132 return 1; 1133 } 1134} 1135