1#!/usr/bin/perl 2# 3# debcheckout: checkout the development repository of a Debian package 4# Copyright (C) 2007-2009 Stefano Zacchiroli <zack@debian.org> 5# Copyright (C) 2010 Christoph Berg <myon@debian.org> 6# 7# This program is free software: you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation, either version 3 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program. If not, see <https://www.gnu.org/licenses/>. 19# 20 21# Created: Tue, 14 Aug 2007 10:20:55 +0200 22# Last-Modified: $Date$ 23 24=head1 NAME 25 26debcheckout - checkout the development repository of a Debian package 27 28=head1 SYNOPSIS 29 30=over 31 32=item B<debcheckout> [I<OPTIONS>] I<PACKAGE> [I<DESTDIR>] 33 34=item B<debcheckout> [I<OPTIONS>] I<REPOSITORY_URL> [I<DESTDIR>] 35 36=item B<debcheckout> B<--help> 37 38=back 39 40=head1 DESCRIPTION 41 42B<debcheckout> retrieves the information about the Version Control System used 43to maintain a given Debian package (the I<PACKAGE> argument), and then checks 44out the latest (potentially unreleased) version of the package from its 45repository. By default the repository is checked out to the I<PACKAGE> 46directory; this can be overridden by providing the I<DESTDIR> argument. 47 48The information about where the repository is available is expected to be found 49in B<Vcs-*> fields available in the source package record. For example, the B<vim> 50package exposes such information with a field like S<B<Vcs-Hg: 51http://hg.debian.org/hg/pkg-vim/vim>>, you can see it by grepping through 52B<apt-cache showsrc vim>. 53 54If more than one source package record containing B<Vcs-*> fields is available, 55B<debcheckout> will select the record with the highest version number. 56Alternatively, a particular version may be selected from those available by 57specifying the package name as I<PACKAGE>=I<VERSION>. 58 59If you already know the URL of a given repository you can invoke 60B<debcheckout> directly on it, but you will probably need to pass the 61appropriate B<-t> flag. That is, some heuristics are in use to guess 62the repository type from the URL; if they fail, you might want to 63override the guessed type using B<-t>. 64 65The currently supported version control systems are: Arch (arch), Bazaar (bzr), CVS (cvs), 66Darcs (darcs), Git (git), Mercurial (hg) and Subversion (svn). 67 68=head1 OPTIONS 69 70B<GENERAL OPTIONS> 71 72=over 73 74=item B<-a>, B<--auth> 75 76Work in authenticated mode; this means that for known repositories (mainly those 77hosted on S<I<https://salsa.debian.org>>) URL rewriting is attempted before 78checking out, to ensure that the repository can be committed to. For example, 79for Git repositories hosted on Salsa this means that 80S<I<git@salsa.debian.org:...git>> will be used instead of 81S<I<https://salsa.debian.org/...git>>. 82 83There are built-in rules for salsa.debian.org, alioth.debian.org and github.com. Other hosts 84can be configured using B<DEBCHECKOUT_AUTH_URLS>. 85 86=item B<-d>, B<--details> 87 88Only print a list of detailed information about the package 89repository, without checking it out; the output format is a list of 90fields, each field being a pair of TAB-separated field name and field 91value. The actual fields depend on the repository type. This action 92might require a network connection to the remote repository. 93 94Also see B<-p>. This option and B<-p> are mutually exclusive. 95 96=item B<-h>, B<--help> 97 98Print a detailed help message and exit. 99 100=item B<-p>, B<--print> 101 102Only print a summary about package repository information, without 103checking it out; the output format is TAB-separated with two fields: 104repository type, repository URL. This action works offline, it only 105uses "static" information as known by APT's cache. 106 107Also see B<-d>. This option and B<-d> are mutually exclusive. 108 109=item B<-P> I<package>, B<--package> I<package> 110 111When checking out a repository URL, instead of trying to guess the package name 112from the URL, use this package name. 113 114=item B<-t> I<TYPE>, B<--type> I<TYPE> 115 116Override the repository type (which defaults to some heuristics based 117on the URL or, in case of heuristic failure, the fallback "git"); 118should be one of the currently supported repository types. 119 120=item B<-u> I<USERNAME>, B<--user> I<USERNAME> 121 122Specify the login name to be used in authenticated mode (see B<-a>). This option 123implies B<-a>: you don't need to specify both. 124 125=item B<-f> I<FILE>, B<--file> I<FILE> 126 127Specify that the named file should be extracted from the repository and placed 128in the destination directory. May be used more than once to extract multiple 129files. 130 131=item B<--source=never>|B<auto>|B<download-only>|B<always> 132 133Some packages only place the F<debian> directory in version control. 134B<debcheckout> can retrieve the remaining parts of the source using B<apt-get 135source> and move the files into the checkout. 136 137=over 138 139=item B<never> 140 141Only use the repository. 142 143=item B<auto> (default) 144 145If the repository only contains the F<debian> directory, retrieve the source 146package, unpack it, and also place the F<.orig.tar.gz> file into the current 147directory. Else, do nothing. 148 149=item B<download-only> 150 151Always retrieve the I<.orig.tar.gz> file, but do not unpack it. 152 153=item B<always> 154 155Always retrieve the I<.orig.tar.gz> file, and if the repository only contains the 156F<debian> directory, unpack it. 157 158=back 159 160=back 161 162B<VCS-SPECIFIC OPTIONS> 163 164I<GIT-SPECIFIC OPTIONS> 165 166=over 167 168=item B<--git-track> I<BRANCHES> 169 170Specify a list of remote branches which will be set up for tracking 171(as in S<B<git branch --track>>, see B<git-branch>(1)) after the remote 172Git repository has been cloned. The list should be given as a 173space-separated list of branch names. 174 175As a shorthand, the string "B<*>" can be given to require tracking of all 176remote branches. 177 178=back 179 180=head1 CONFIGURATION VARIABLES 181 182The two configuration files F</etc/devscripts.conf> and 183F<~/.devscripts> are sourced by a shell in that order to set 184configuration variables. Command line options can be used to override 185configuration file settings. Environment variable settings are ignored 186for this purpose. The currently recognised variables are: 187 188=over 189 190=item B<DEBCHECKOUT_AUTH_URLS> 191 192This variable should be a space separated list of Perl regular 193expressions and replacement texts, which must come in pairs: I<REGEXP> 194I<TEXT> I<REGEXP> I<TEXT> ... and so on. Each pair denotes a substitution which 195is applied to repository URLs if other built-in means of building URLs 196for authenticated mode (see B<-a>) have failed. 197 198References to matching substrings in the replacement texts are 199allowed as usual in Perl by the means of B<$1>, B<$2>, ... and so on. 200 201This setting is used to configure the "authenticated mode" location for 202repositories. The Debian repositories on S<salsa.debian.org> are implicitly 203defined, as is S<github.com>. 204 205Here is a sample snippet suitable for the configuration files: 206 207 DEBCHECKOUT_AUTH_URLS=' 208 ^\w+://(svn\.example\.com)/(.*) svn+ssh://$1/srv/svn/$2 209 ^\w+://(git\.example\.com)/(.*) git+ssh://$1/home/git/$2 210 ' 211 212Note that whitespace is not allowed in either regexps or 213replacement texts. Also, given that configuration files are sourced by 214a shell, you probably want to use single quotes around the value of 215this variable. 216 217=item B<DEBCHECKOUT_SOURCE> 218 219This variable determines under what scenarios the associated orig.tar.gz for a 220package will be downloaded. See the B<--source> option for a description of 221the values. 222 223=item B<DEBCHECKOUT_USER> 224 225This variable sets the username for authenticated mode. It can be overridden 226with the B<--user> option. Setting this variable does not imply the use of 227authenticated mode, it still has to be activated with B<--auth>. 228 229=back 230 231=head1 SEE ALSO 232 233B<apt-cache>(8), Section 6.2.5 of the Debian Developer's Reference (for 234more information about B<Vcs-*> fields): S<I<https://www.debian.org/doc/developers-reference/best-pkging-practices.html#bpp-vcs>>. 235 236=head1 AUTHOR 237 238B<debcheckout> and this manpage have been written by Stefano Zacchiroli 239<I<zack@debian.org>>. 240 241=cut 242 243use strict; 244use warnings; 245no if $] >= 5.018, 'warnings', 'experimental::smartmatch'; 246use feature 'switch'; 247use Getopt::Long qw(:config bundling permute no_getopt_compat); 248use Pod::Usage; 249use File::Basename; 250use File::Copy qw/copy/; 251use File::Temp qw/tempdir/; 252use Cwd; 253use Devscripts::Compression; 254use Devscripts::Versort; 255 256my @files = (); # files to checkout 257 258my $compression_re = compression_get_file_extension_regex(); 259 260# <snippet from="bts.pl"> 261# <!-- TODO we really need to factor out in a Perl module the 262# configuration file parsing code --> 263my @config_files = ('/etc/devscripts.conf', '~/.devscripts'); 264my %config_vars = ( 265 'DEBCHECKOUT_AUTH_URLS' => '', 266 'DEBCHECKOUT_SOURCE' => 'auto', 267 'DEBCHECKOUT_USER' => '', 268); 269my %config_default = %config_vars; 270my $shell_cmd; 271# Set defaults 272foreach my $var (keys %config_vars) { 273 $shell_cmd .= qq[$var="$config_vars{$var}";\n]; 274} 275$shell_cmd .= 'for file in ' . join(" ", @config_files) . "; do\n"; 276$shell_cmd .= '[ -f $file ] && . $file; done;' . "\n"; 277# Read back values 278foreach my $var (keys %config_vars) { $shell_cmd .= "echo \$$var;\n" } 279my $shell_out = `/bin/bash -c '$shell_cmd'`; 280@config_vars{ keys %config_vars } = split /\n/, $shell_out, -1; 281# </snippet> 282 283my $lwp_broken; 284my $ua; 285 286sub have_lwp() { 287 return ($lwp_broken ? 0 : 1) if defined $lwp_broken; 288 eval { 289 require LWP; 290 require LWP::UserAgent; 291 }; 292 293 if ($@) { 294 if ($@ =~ m%^Can\'t locate LWP%) { 295 $lwp_broken = "the libwww-perl package is not installed"; 296 } else { 297 $lwp_broken = "couldn't load LWP::UserAgent: $@"; 298 } 299 } else { 300 $lwp_broken = ''; 301 } 302 return $lwp_broken ? 0 : 1; 303} 304 305sub init_agent { 306 $ua = new LWP::UserAgent; # we create a global UserAgent object 307 $ua->agent("LWP::UserAgent/Devscripts"); 308 $ua->env_proxy; 309} 310 311sub recurs_mkdir { 312 my ($dir) = @_; 313 my @temp = split /\//, $dir; 314 my $createdir = ""; 315 foreach my $piece (@temp) { 316 if (!length $createdir and !length $piece) { 317 $createdir = "/"; 318 } elsif (length $createdir and $createdir ne "/") { 319 $createdir .= "/"; 320 } 321 $createdir .= "$piece"; 322 if (!-d $createdir) { 323 mkdir($createdir) or return 0; 324 } 325 } 326 return 1; 327} 328 329# Find the repository URL (and type) for a given package name, parsing Vcs-* 330# fields. Returns (version, type, url, origtgz_name) tuple. 331sub find_repo($$) { 332 my ($pkg, $desired_ver) = @_; 333 my @repo = ("", 0, "", ""); 334 my $found = 0; 335 my ($nonepoch_version, $version) = ("", ""); 336 my $origtgz_name = ""; 337 my $type = ""; 338 my $url = ""; 339 my @repos = (); 340 341 open(APT, "apt-cache showsrc $pkg |"); 342 while (my $line = <APT>) { 343 $found = 1; 344 chomp($line); 345 if ($line =~ /^(x-)?vcs-(\w+):\s*(.*)$/i) { 346 next if lc($2) eq "browser"; 347 ($type, $url) = (lc($2), $3); 348 } elsif ($line =~ /^Version:\s*(.*)$/i) { 349 $version = $1; 350 ($nonepoch_version = $version) =~ s/^\d+://; 351 } elsif ($line 352 =~ /^ [a-f0-9]{32} \d+ (\S+)(?:_\Q$nonepoch_version\E|\.orig)\.tar\.$compression_re$/ 353 ) { 354 $origtgz_name = $1; 355 } elsif ($line =~ /^$/) { 356 push(@repos, [$version, $type, $url, $origtgz_name]) 357 if ( $version 358 and $type 359 and $url 360 and ($desired_ver eq "" or $desired_ver eq $version)); 361 $version = ""; 362 $type = ""; 363 $url = ""; 364 $origtgz_name = ""; 365 } 366 } 367 close(APT); 368 die "unknown package '$pkg'\n" unless $found; 369 370 if (@repos) { 371 @repos = Devscripts::Versort::versort(@repos); 372 @repo = @{ $repos[0] }; 373 } 374 return @repo; 375} 376 377# Find the browse URL for a given package name, parsing Vcs-* fields. 378sub find_browse($$) { 379 my ($pkg, $desired_ver) = @_; 380 my $browse = ""; 381 my $found = 0; 382 my $version = ""; 383 my @browses; 384 385 open(APT, "apt-cache showsrc $pkg |"); 386 while (my $line = <APT>) { 387 $found = 1; 388 chomp($line); 389 if ($line =~ /^(x-)?vcs-(\w+):\s*(.*)$/i) { 390 if (lc($2) eq "browser") { 391 $browse = $3; 392 } 393 } elsif ($line =~ /^Version:\s*(.*)$/i) { 394 $version = $1; 395 } elsif ($line =~ /^$/) { 396 push(@browses, [$version, $browse]) 397 if $version 398 and $browse 399 and ($desired_ver eq "" or $desired_ver eq $version); 400 $version = ""; 401 $browse = ""; 402 } 403 } 404 close(APT); 405 die "unknown package '$pkg'\n" unless $found; 406 if (@browses) { 407 @browses = Devscripts::Versort::versort(@browses); 408 $browse = $browses[0][1]; 409 } 410 return $browse; 411} 412 413# Patch the cmdline invocation of a VCS to ensure the repository is checkout to 414# a given target directory. 415sub set_destdir($$@) { 416 my ($repo_type, $destdir, @cmd) = @_; 417 $destdir =~ s|^-d\s*||; 418 419 given ($repo_type) { 420 when ("cvs") { 421 my $module = pop @cmd; 422 push @cmd, ("-d", $destdir, $module); 423 } 424 when (/^(bzr|darcs|git|hg|svn)$/) { 425 push @cmd, $destdir; 426 } 427 default { 428 die 429"sorry, don't know how to set the destination directory for $repo_type repositories (patches welcome!)\n"; 430 } 431 } 432 return @cmd; 433} 434 435# try patching a repository URL to enable authenticated mode, *relying 436# only on user defined rules* 437sub user_set_auth($$) { 438 my ($repo_type, $url) = @_; 439 my @rules = split ' ', $config_vars{'DEBCHECKOUT_AUTH_URLS'}; 440 while (my $pat = shift @rules) { # read pairs for s/$pat/$subst/ 441 my $subst = shift @rules 442 or die 443"Configuration error for DEBCHECKOUT_AUTH_URLS: regexp and replacement texts must come in pairs. See debcheckout(1).\n"; 444 $url =~ s/$pat/qq("$subst")/ee; # ZACK: my worst Perl line ever 445 } 446 return $url; 447} 448 449# Patch a given repository URL to ensure that the checked out out repository 450# can be committed to. Only works for well known repositories (mainly Salsa's). 451sub set_auth($$$$) { 452 my ($repo_type, $url, $user, $dont_act) = @_; 453 454 my $old_url = $url; 455 456 $user .= "@" if length $user; 457 my $user_local = $user; 458 $user_local =~ s|(.*)(@)|$1|; 459 my $user_url = $url; 460 461# Adjust alioth urls from new-style anonymous access to old-style and then deal 462# with adjusting for authentication on alioth 463 $url 464 =~ s@(?:alioth\.debian\.org/(?:anonscm/bzr|scm/loggerhead/bzr)|anonscm\.debian\.org/bzr(?:/bzr)?)@bzr.debian.org/bzr@; 465 $url 466 =~ s@(?:alioth\.debian\.org/anonscm/darcs|anonscm\.debian\.org/darcs)@darcs.debian.org/darcs@; 467 $url =~ s@git://anonscm\.debian\.org@git://git.debian.org@; 468 $url 469 =~ s@(?:alioth\.debian\.org/anonscm/c?git|anonscm\.debian\.org/c?git)@git.debian.org/git@; 470 $url 471 =~ s@(?:alioth\.debian\.org/anonscm/hg|anonscm\.debian\.org/hg)@hg.debian.org/hg@; 472 $url =~ s@svn://(?:scm\.alioth|anonscm)\.debian\.org@svn://svn.debian.org@; 473 474 # other providers 475 $url =~ s!(?:git|https?)://github\.com/!git\@github.com:!; 476 477 given ($repo_type) { 478 when ("bzr") { 479 $url 480 =~ s|^[\w+]+://(bzr\.debian\.org)/(.*)|bzr+ssh://$user$1/bzr/$2|; 481 $url 482 =~ s[^\w+://(?:(bazaar|code)\.)?(launchpad\.net/.*)][bzr+ssh://${user}bazaar.$2]; 483 } 484 when ("darcs") { 485 if ($url =~ m|(~)|) { 486 $user_url =~ s|^\w+://(darcs\.debian\.org)/(~)(.*?)/.*|$3|; 487 die 488"the local user '$user_local' doesn't own the personal repository '$url'\n" 489 if $user_local ne $user_url and !$dont_act; 490 $url 491 =~ s|^\w+://(darcs\.debian\.org)/(~)(.*?)/(.*)|$user$1:~/public_darcs/$4|; 492 } else { 493 $url 494 =~ s|^\w+://(darcs\.debian\.org)/(?:darcs/)?(.*)|$user$1:/darcs/$2|; 495 } 496 } 497 when ("git") { 498 if ($url =~ s!^https://salsa.debian.org/!git\@salsa.debian.org:!) { 499 } elsif ($url =~ m%(/users/|~)%) { 500 $user_url 501 =~ s|^\w+://(git\.debian\.org)/git/users/(.*?)/.*|$2|; 502 $user_url =~ s|^\w+://(git\.debian\.org)/~(.*?)/.*|$2|; 503 504 die 505"the local user '$user_local' doesn't own the personal repository '$url'\n" 506 if $user_local ne $user_url and !$dont_act; 507 $url 508 =~ s|^\w+://(git\.debian\.org)/git/users/.*?/(.*)|git+ssh://$user$1/~/public_git/$2|; 509 $url 510 =~ s|^\w+://(git\.debian\.org)/~.*?/(.*)|git+ssh://$user$1/~/public_git/$2|; 511 } else { 512 $url 513 =~ s|^\w+://(git\.debian\.org)/(?:git/)?(.*)|git+ssh://$user$1/git/$2|; 514 } 515 $url 516 =~ s[^\w+://(?:(git|code)\.)?(launchpad\.net/.*)][git+ssh://${user}git.$2]; 517 } 518 # "hg ssh://" needs an extra slash so paths are not based in the user's $HOME 519 when ("hg") { 520 $url =~ s|^\w+://(hg\.debian\.org/)|ssh://$user$1/|; 521 } 522 when ("svn") { 523 $url =~ s|^\w+://(svn\.debian\.org)/(.*)|svn+ssh://$user$1/svn/$2|; 524 } 525 default { 526 die 527"sorry, don't know how to enable authentication for $repo_type repositories (patches welcome!)\n"; 528 } 529 } 530 if ($url eq $old_url) { # last attempt: try with user-defined rules 531 $url = user_set_auth($repo_type, $url); 532 } 533 die 534"can't use authenticated mode on repository '$url' since it is not a known repository (e.g. salsa.debian.org)\n" 535 if $url eq $old_url; 536 return $url; 537} 538 539# Hack around specific, known deficiencies in repositories that don't follow 540# standard behavior. 541sub munge_url($$) { 542 my ($repo_type, $repo_url) = @_; 543 544 given ($repo_type) { 545 when ('bzr') { 546 # bzr.d.o explicitly doesn't run a smart server. Need to use nosmart 547 $repo_url 548 =~ s|^http://(bzr\.debian\.org)/(.*)|nosmart+http://$1/$2|; 549 } 550 } 551 return $repo_url; 552} 553 554# returns an error code after system(). If system() exited normally, this is the 555# error code of the child process. If it exited with a signal (if a user hit 556# C-c, say) then this returns something <0. In either case, errorcode()==0 means 557# "success" 558sub errorcode { 559 my $code = $? >> 8; 560 if ($code == 0 && $? != 0) { 561 return -$?; 562 } 563 return $code; 564} 565 566# Checkout a given repository in a given destination directory. 567sub checkout_repo($$$$) { 568 my ($repo_type, $repo_url, $destdir, $anon_repo_url) = @_; 569 my (@cmd, @extracmd); 570 571 given ($repo_type) { 572 when ("arch") { @cmd = ("tla", "grab", $repo_url); } # XXX ??? 573 when ("bzr") { @cmd = ("bzr", "branch", $repo_url); } 574 when ("cvs") { 575 $repo_url =~ s|^-d\s*||; 576 my ($root, $module) = split /\s+/, $repo_url; 577 $module ||= ''; 578 @cmd = ("cvs", "-d", $root, "checkout", $module); 579 } 580 when ("darcs") { @cmd = ("darcs", "get", $repo_url); } 581 when ("git") { 582 my $push_url; 583 584 if (defined $anon_repo_url and length $anon_repo_url) { 585 if ($repo_url =~ m|(.*)\s+-b\s+(.*)|) { 586 $push_url = $1; 587 } else { 588 $push_url = $repo_url; 589 } 590 591 $repo_url = $anon_repo_url; 592 } 593 594 if ($repo_url =~ m|(.*)\s+-b\s+(.*)|) { 595 @cmd = ("git", "clone", $1, "-b", $2); 596 } else { 597 @cmd = ("git", "clone", $repo_url); 598 } 599 600 if ($push_url) { 601 @extracmd = ('git', 'remote', 'set-url', '--push', 'origin', 602 $push_url); 603 } 604 } 605 when ("hg") { @cmd = ("hg", "clone", $repo_url); } 606 when ("svn") { @cmd = ("svn", "co", $repo_url); } 607 default { die "unsupported version control system '$repo_type'.\n"; } 608 } 609 @cmd = set_destdir($repo_type, $destdir, @cmd) if length $destdir; 610 print "@cmd ...\n"; 611 system @cmd; 612 my $rc = errorcode(); 613 614 if ($rc == 0 && @extracmd) { 615 my $oldcwd = getcwd(); 616 my $clonedir; 617 618 print "@extracmd ...\n"; 619 620 if (length $destdir) { 621 $clonedir = $destdir; 622 } else { 623 ($clonedir = $repo_url) =~ s|.*/(.*)(.git)?|$1|; 624 } 625 626 chdir $clonedir; 627 system @extracmd; 628 $rc = errorcode(); 629 chdir($oldcwd); 630 } 631 632 return $rc; 633} 634 635# Checkout a given set of files from a given repository in a given 636# destination directory. 637sub checkout_files($$$$) { 638 my ($repo_type, $repo_url, $destdir, $browse_url) = @_; 639 my @cmd; 640 my $tempdir; 641 642 foreach my $file (@files) { 643 my $fetched = 0; 644 645 # Cheap'n'dirty escaping 646 # We should possibly depend on URI::Escape, but this should do... 647 my $escaped_file = $file; 648 $escaped_file =~ s|\+|%2B|g; 649 650 my $dir; 651 if (defined $destdir and length $destdir) { 652 $dir = "$destdir/"; 653 } else { 654 $dir = "./"; 655 } 656 $dir .= dirname($file); 657 658 if (!recurs_mkdir($dir)) { 659 print STDERR "Failed to create directory $dir\n"; 660 return 1; 661 } 662 663 given ($repo_type) { 664 when ("arch") { 665 # If we've already retrieved a copy of the repository, 666 # reuse it 667 if (!length($tempdir)) { 668 if ( 669 !( 670 $tempdir = tempdir( 671 "debcheckoutXXXX", 672 TMPDIR => 1, 673 CLEANUP => 1 674 )) 675 ) { 676 print STDERR 677 "Failed to create temporary directory . $!\n"; 678 return 1; 679 } 680 681 my $oldcwd = getcwd(); 682 chdir $tempdir; 683 @cmd = ("tla", "grab", $repo_url); 684 print "@cmd ...\n"; 685 my $rc = system(@cmd); 686 chdir $oldcwd; 687 return ($rc >> 8) if $rc != 0; 688 } 689 690 if (!copy("$tempdir/$file", $dir)) { 691 print STDERR "Failed to copy $file to $dir: $!\n"; 692 return 1; 693 } 694 } 695 when ("cvs") { 696 if (!length($tempdir)) { 697 if ( 698 !( 699 $tempdir = tempdir( 700 "debcheckoutXXXX", 701 TMPDIR => 1, 702 CLEANUP => 1 703 )) 704 ) { 705 print STDERR 706 "Failed to create temporary directory . $!\n"; 707 return 1; 708 } 709 } 710 $repo_url =~ s|^-d\s*||; 711 my ($root, $module) = split /\s+/, $repo_url; 712 # If an explicit module name isn't present, use the last 713 # component of the URL 714 if (!length($module)) { 715 $module = $repo_url; 716 $module =~ s%^.*/(.*?)$%$1%; 717 } 718 $module .= "/$file"; 719 $module =~ s%//%/%g; 720 721 my $oldcwd = getcwd(); 722 chdir $tempdir; 723 @cmd = ("cvs", "-d", $root, "export", "-r", "HEAD", "-f", 724 $module); 725 print "\n@cmd ...\n"; 726 system @cmd; 727 if (errorcode() != 0) { 728 chdir $oldcwd; 729 return (errorcode()); 730 } else { 731 chdir $oldcwd; 732 if (copy("$tempdir/$module", $dir)) { 733 print "Copied to $destdir/$file\n"; 734 } else { 735 print STDERR "Failed to copy $file to $dir: $!\n"; 736 return 1; 737 } 738 } 739 } 740 when (/(svn|bzr)/) { 741 @cmd = ($repo_type, "cat", "$repo_url/$file"); 742 print "@cmd > $dir/" . basename($file) . " ... \n"; 743 if (!open CAT, '-|', @cmd) { 744 print STDERR "Failed to execute @cmd $!\n"; 745 return 1; 746 } 747 local $/; 748 my $content = <CAT>; 749 close CAT; 750 if (!open OUTPUT, ">", $dir . "/" . basename($file)) { 751 print STDERR "Failed to create output file " 752 . basename($file) . " $!\n"; 753 return 1; 754 } 755 print OUTPUT $content; 756 close OUTPUT; 757 } 758 when (/(darcs|hg)/) { 759 # Subtly different but close enough 760 if (have_lwp) { 761 print "Attempting to retrieve $file via HTTP ...\n"; 762 763 my $file_url 764 = $repo_type eq "darcs" 765 ? "$repo_url/$escaped_file" 766 : "$repo_url/raw-file/tip/$file"; 767 init_agent() unless $ua; 768 my $request = HTTP::Request->new('GET', "$file_url"); 769 my $response = $ua->request($request); 770 if ($response->is_success) { 771 if (!open OUTPUT, ">", $dir . "/" . basename($file)) { 772 print STDERR "Failed to create output file " 773 . basename($file) . " $!\n"; 774 return 1; 775 } 776 print "Writing to $dir/" . basename($file) . " ... \n"; 777 print OUTPUT $response->content; 778 close OUTPUT; 779 $fetched = 1; 780 } 781 } 782 if ($fetched == 0) { 783 # If we've already retrieved a copy of the repository, 784 # reuse it 785 if (!length($tempdir)) { 786 if ( 787 !( 788 $tempdir = tempdir( 789 "debcheckoutXXXX", 790 TMPDIR => 1, 791 CLEANUP => 1 792 )) 793 ) { 794 print STDERR 795 "Failed to create temporary directory . $!\n"; 796 return 1; 797 } 798 799 # Can't get / clone in to a directory that already exists... 800 $tempdir .= "/repo"; 801 if ($repo_type eq "darcs") { 802 @cmd = ("darcs", "get", $repo_url, $tempdir); 803 } else { 804 @cmd = ("hg", "clone", $repo_url, $tempdir); 805 } 806 print "@cmd ...\n"; 807 my $rc = system(@cmd); 808 return ($rc >> 8) if $rc != 0; 809 print "\n"; 810 } 811 } 812 if (copy "$tempdir/$file", $dir) { 813 print "Copied $file to $dir\n"; 814 } else { 815 print STDERR "Failed to copy $file to $dir: $!\n"; 816 return 1; 817 } 818 } 819 when ("git") { 820 # If there isn't a browse URL (either because the package 821 # doesn't ship one, or because we were called with a URL, 822 # try a common pattern for gitweb 823 if (!length($browse_url)) { 824 if ($repo_url =~ m%^\w+://([^/]+)/(?:git/)?(.*)$%) { 825 $browse_url = "http://$1/?p=$2"; 826 } 827 } 828 if (have_lwp and $browse_url =~ /^http/) { 829 $escaped_file =~ s|/|%2F|g; 830 831 print "Attempting to retrieve $file via HTTP ...\n"; 832 833 init_agent() unless $ua; 834 my $file_url = "$browse_url;a=blob_plain"; 835 $file_url .= ";f=$escaped_file;hb=HEAD"; 836 my $request = HTTP::Request->new('GET', $file_url); 837 my $response = $ua->request($request); 838 my $error = 0; 839 if (!$response->is_success) { 840 if ($browse_url =~ /\.git$/) { 841 print "Error retrieving file: " 842 . $response->status_line . "\n"; 843 $error = 1; 844 } else { 845 $browse_url .= ".git"; 846 $file_url = "$browse_url;a=blob_plain"; 847 $file_url .= ";f=$escaped_file;hb=HEAD"; 848 $request = HTTP::Request->new('GET', $file_url); 849 $response = $ua->request($request); 850 if (!$response->is_success) { 851 print "Error retrieving file: " 852 . $response->status_line . "\n"; 853 $error = 1; 854 } 855 } 856 } 857 if (!$error) { 858 if (!open OUTPUT, ">", $dir . "/" . basename($file)) { 859 print STDERR "Failed to create output file " 860 . basename($file) . " $!\n"; 861 return 1; 862 } 863 print "Writing to $dir/" . basename($file) . " ... \n"; 864 print OUTPUT $response->content; 865 close OUTPUT; 866 $fetched = 1; 867 } 868 } 869 if ($fetched == 0) { 870 # If we've already retrieved a copy of the repository, 871 # reuse it 872 if (!length($tempdir)) { 873 if ( 874 !( 875 $tempdir = tempdir( 876 "debcheckoutXXXX", 877 TMPDIR => 1, 878 CLEANUP => 1 879 )) 880 ) { 881 print STDERR 882 "Failed to create temporary directory . $!\n"; 883 return 1; 884 } 885 # Since git won't clone in to a directory that 886 # already exists... 887 $tempdir .= "/repo"; 888 # Can't shallow clone from an http:: URL 889 $repo_url =~ s/^http/git/; 890 @cmd = ( 891 "git", "clone", "--depth", "1", $repo_url, 892 "$tempdir" 893 ); 894 print "@cmd ...\n\n"; 895 my $rc = system(@cmd); 896 return ($rc >> 8) if $rc != 0; 897 print "\n"; 898 } 899 900 my $oldcwd = getcwd(); 901 chdir $tempdir; 902 903 @cmd = ($repo_type, "show", "HEAD:$file"); 904 print "@cmd ... > $dir/" . basename($file) . "\n"; 905 if (!open CAT, '-|', @cmd) { 906 print STDERR "Failed to execute @cmd $!\n"; 907 chdir $oldcwd; 908 return 1; 909 } 910 chdir $oldcwd; 911 local $/; 912 my $content = <CAT>; 913 close CAT; 914 if (!open OUTPUT, ">", $dir . "/" . basename($file)) { 915 print STDERR "Failed to create output file " 916 . basename($file) . " $!\n"; 917 return 1; 918 } 919 print OUTPUT $content; 920 close OUTPUT; 921 } 922 } 923 default { 924 die "unsupported version control system '$repo_type'.\n"; 925 } 926 } 927 } 928 929 # If we've got this far, all the files were retrieved successfully 930 return 0; 931} 932 933# download source package, unpack it, and merge its contents into the checkout 934sub unpack_source($$$$$) { 935 my ($pkg, $version, $destdir, $origtgz_name, $unpack_source) = @_; 936 937 return 1 if ($unpack_source eq 'never'); 938 return 1 939 if (defined $origtgz_name and $origtgz_name eq '') 940 ; # only really relevant with URL on command line 941 942 $destdir ||= $pkg; 943 # Apt will auto-resolve binary package names to source package names. We 944 # need to know the source package name to correctly identify the source 945 # package artifacts (dsc, orig.tar.*, etc) 946 (my $srcpkg = $origtgz_name) =~ s/_.*//; 947 # is this a debian-dir-only repository? 948 unless (-d $destdir) { 949 print STDERR 950"debcheckout did not create the $destdir directory - this is probably a bug\n"; 951 return 0; 952 } 953 my @repo_files = glob "$destdir/*"; 954 my $debian_only = 0; 955 if (@repo_files == 1 and $repo_files[0] eq "$destdir/debian") { 956 $debian_only = 1; 957 } 958 959 return 1 if ($unpack_source eq 'auto' and not $debian_only); 960 if ($unpack_source ne 'download-only' and $debian_only) { 961 print 962"repository only contains the debian directory, using apt-get source\n"; 963 } 964 965 my $tmpdir = File::Temp->newdir(DIR => "."); 966 967 # unpack 968 my $oldcwd = getcwd(); 969 chdir $tmpdir; 970 my @args = ('source'); 971 push @args, '--download-only' 972 if ($unpack_source eq 'download-only' or not $debian_only); 973 push @args, $version ? "$srcpkg=$version" : $srcpkg; 974 system('apt-get', @args); 975 chdir $oldcwd; 976 977 if (errorcode()) { 978 print STDERR "apt-get source failed\n"; 979 return 0; 980 } 981 982 # put source package in place 983 foreach my $sourcefile (glob "$tmpdir/${srcpkg}_*") { 984 next unless (-f $sourcefile); # skip directories 985 my $base = $sourcefile; 986 $base =~ s!.*/!!; 987 rename $sourcefile, $base or die "rename $sourcefile $base: $!"; 988 } 989 990 return 1 if ($unpack_source eq 'download-only' or not $debian_only); 991 992 # figure out which directory was created 993 my @dirs = glob "$tmpdir/$srcpkg-*/"; 994 unless (@dirs) { 995 print STDERR 996 "apt-get source did not create any $tmpdir/$srcpkg-* directory\n"; 997 return 0; 998 } 999 my $directory = $dirs[0]; 1000 chop $directory; 1001 1002 # move all files over, except the debian directory 1003 opendir DIR, $directory or die "opendir $directory: $!"; 1004 foreach my $file (readdir DIR) { 1005 if ($file eq 'debian') { 1006 system('rm', '-rf', "$directory/$file"); 1007 } elsif ($file eq '.' or $file eq '..') { 1008 next; 1009 } else { 1010 rename "$directory/$file", "$destdir/$file" 1011 or die "rename $directory/$file $destdir/$file: $!"; 1012 } 1013 } 1014 closedir DIR; 1015 rmdir $directory or die "rmdir $directory: $!"; 1016 1017 # $tmpdir is automatically removed 1018 return 1; 1019} 1020 1021# Print information about a repository and quit. 1022sub print_repo($$) { 1023 my ($repo_type, $repo_url) = @_; 1024 1025 print "$repo_type\t$repo_url\n"; 1026 exit(0); 1027} 1028 1029sub git_ls_remote($$) { 1030 my ($url, $prefix) = @_; 1031 1032 $url =~ s|\s+-b\s+.*||; 1033 my $cmd = "git ls-remote '$url'"; 1034 $cmd .= " '$prefix/*'" if length $prefix; 1035 open GIT, "$cmd |" or die "can't execute $cmd\n"; 1036 my @refs; 1037 while (my $line = <GIT>) { 1038 chomp $line; 1039 my ($sha1, $name) = split /\s+/, $line; 1040 my $ref = $name; 1041 $ref = substr($ref, length($prefix) + 1) if length $prefix; 1042 push @refs, $ref; 1043 } 1044 close GIT; 1045 return @refs; 1046} 1047 1048# Given a GIT repository URL, extract its topgit info (if any), see 1049# the "topgit" package for more information 1050sub tg_info($) { 1051 my ($url) = @_; 1052 1053 my %info; 1054 $info{'topgit'} = 'no'; 1055 $info{'top-bases'} = ''; 1056 my @bases = git_ls_remote($url, 'refs/top-bases'); 1057 if (@bases) { 1058 $info{'topgit'} = 'yes'; 1059 $info{'top-bases'} = join ' ', @bases; 1060 } 1061 return (\%info); 1062} 1063 1064# Print details about a repository and quit. 1065sub print_details($$) { 1066 my ($repo_type, $repo_url) = @_; 1067 1068 print "type\t$repo_type\n"; 1069 print "url\t$repo_url\n"; 1070 if ($repo_type eq "git") { 1071 my $tg_info = tg_info($repo_url); 1072 while (my ($k, $v) = each %$tg_info) { 1073 print "$k\t$v\n"; 1074 } 1075 } 1076 exit(0); 1077} 1078 1079sub guess_repo_type($$) { 1080 my ($repo_url, $default) = @_; 1081 my $repo_type = $default; 1082 if ($repo_url =~ /^(git|svn|bzr)(\+ssh)?:/) { 1083 $repo_type = $1; 1084 } elsif ($repo_url =~ /^https?:\/\/(svn|git|hg|bzr|darcs)\.debian\.org/) { 1085 $repo_type = $1; 1086 } elsif ( 1087 $repo_url =~ m@^https?://anonscm.debian.org/(svn|c?git|hg|bzr|darcs)/@) 1088 { 1089 $repo_type = $1; 1090 $repo_type =~ s/cgit/git/; 1091 } 1092 return $repo_type; 1093} 1094 1095# Does a given string match the lexical rules for package names? 1096sub is_package($) { 1097 my ($arg) = @_; 1098 1099 return ($arg =~ /^[a-z0-9.+-]+$/); # lexical rule for package names 1100} 1101 1102sub main() { 1103 my $auth = 0; # authenticated mode 1104 my $destdir = ""; # destination directory 1105 my $pkg = ""; # package name 1106 my $version = ""; # package version 1107 my $origtgz_name 1108 = undef; # orig.tar.gz name (or "" when none; undef means unknown) 1109 my $print_mode = 0; # print only mode 1110 my $details_mode = 0; # details only mode 1111 my $use_package = ''; # use this package instead of guessing from the URL 1112 my $repo_type = "git"; # default repo typo, overridden by '-t' 1113 my $repo_url = ""; # repository URL 1114 my $anon_repo_url; # repository URL (before auth mangling) 1115 my $user = ""; # login name (authenticated mode only) 1116 my $browse_url = ""; # online browsable repository URL 1117 my $git_track = ""; # list of remote GIT branches to --track 1118 my $unpack_source 1119 = $config_vars{DEBCHECKOUT_SOURCE}; # retrieve and unpack orig.tar.gz 1120 GetOptions( 1121 "auth|a" => \$auth, 1122 "help|h" => sub { pod2usage({ -exitval => 0, -verbose => 1 }); }, 1123 "print|p" => \$print_mode, 1124 "details|d" => \$details_mode, 1125 "package|P=s" => \$use_package, 1126 "type|t=s" => \$repo_type, 1127 "user|u=s" => \$user, 1128 "file|f=s" => sub { push(@files, $_[1]); }, 1129 "git-track=s" => \$git_track, 1130 "source=s" => \$unpack_source, 1131 ) or pod2usage({ -exitval => 3 }); 1132 pod2usage({ -exitval => 3 }) if ($#ARGV < 0 or $#ARGV > 1); 1133 pod2usage({ 1134 -exitval => 3, 1135 -message => "-d and -p are mutually exclusive.\n", 1136 }) if ($print_mode and $details_mode); 1137 my $dont_act = 1 if ($print_mode or $details_mode); 1138 pod2usage({ 1139 -exitval => 3, 1140 -message => 1141"--source argument must be one of never, auto, download-only, and always\n", 1142 }) unless ($unpack_source =~ /^(never|auto|download-only|always)$/); 1143 1144 # -u|--user implies -a|--auth 1145 $auth = 1 if length $user; 1146 1147 # set user from the config file to be used with -a|--auth without -u|--user 1148 $user = $config_vars{DEBCHECKOUT_USER} unless $user; 1149 1150 $destdir = $ARGV[1] if $#ARGV > 0; 1151 ($pkg, $version) = split(/=/, $ARGV[0]); 1152 $version ||= ""; 1153 1154 if (not is_package($pkg)) { # repo-url passed on the command line 1155 $repo_url = $ARGV[0]; 1156 $repo_type = guess_repo_type($repo_url, $repo_type); 1157 $pkg = ""; 1158 $version = ""; 1159 # when --package is given, use it 1160 if ($use_package) { 1161 $pkg = $use_package; 1162 # else guess package from url 1163 } elsif ($repo_url =~ m!/trunk/([a-z0-9.+-]+)!) 1164 { # svn with {trunk,tags,branches}/$pkg 1165 $pkg = $1; 1166 } elsif ($repo_url =~ m!([a-z0-9.+-]+)/trunk/?!) 1167 { # svn with $pkg/{trunk,tags,branches} 1168 $pkg = $1; 1169 } elsif ($repo_url =~ /([a-z0-9.+-]+)\.git(\s+-b\s+.*)?$/) { # git 1170 $pkg = $1; 1171 } elsif ($repo_url =~ /([a-z0-9.+-]+)$/) { # catch-all 1172 $pkg = $1; 1173 } 1174 $origtgz_name = $pkg 1175 ; # FIXME: this should rather set srcpkg in unpack_source() directly 1176 } else { # package name passed on the command line 1177 ($version, $repo_type, $repo_url, $origtgz_name) 1178 = find_repo($pkg, $version); 1179 unless ($repo_type) { 1180 my $vermsg = ""; 1181 $vermsg = ", version $version" if length $version; 1182 print <<EOF; 1183No repository found for package $pkg$vermsg. 1184 1185A Vcs-* field is missing in its source record. See Debian Developer's 1186Reference 6.2.5: 1187 `https://www.debian.org/doc/developers-reference/best-pkging-practices.html#bpp-vcs' 1188If you know that the package is maintained via a version control 1189system consider asking the maintainer to expose such information. 1190 1191Nevertheless, you can get the sources of package $pkg 1192from the Debian archive executing: 1193 1194 apt-get source $pkg 1195 1196Note however that what you obtain will *not* be a local copy of 1197some version control system: your changes will not be preserved 1198and it will not be possible to commit them directly. 1199 1200EOF 1201 exit(1); 1202 } 1203 $browse_url = find_browse($pkg, $version) if @files; 1204 } 1205 1206 $repo_url = munge_url($repo_type, $repo_url); 1207 if ($auth and not @files) { 1208 $anon_repo_url = $repo_url; 1209 $repo_url = set_auth($repo_type, $repo_url, $user, $dont_act); 1210 } 1211 print_repo($repo_type, $repo_url) if $print_mode; # ... then quit 1212 print_details($repo_type, $repo_url) if $details_mode; # ... then quit 1213 if (length $pkg) { 1214 print "declared $repo_type repository at $repo_url\n"; 1215 $destdir = $pkg unless length $destdir; 1216 } 1217 my $rc; 1218 if (@files) { 1219 $rc = checkout_files($repo_type, $repo_url, $destdir, $browse_url); 1220 } else { 1221 $rc = checkout_repo($repo_type, $repo_url, $destdir, $anon_repo_url); 1222 } # XXX: there is no way to know for sure what is the destdir :-( 1223 die "checkout failed (the command above returned a non-zero exit code)\n" 1224 if $rc != 0; 1225 1226 # post-checkout actions 1227 if ($repo_type eq 'bzr' and $auth) { 1228 if (open B, '>>', "$destdir/.bzr/branch/branch.conf") { 1229 print B "\npush_location = $repo_url"; 1230 close B; 1231 } else { 1232 print STDERR 1233 "failed to open branch.conf to add push_location: $!\n"; 1234 } 1235 } elsif ($repo_type eq 'git') { 1236 my $tg_info = tg_info($repo_url); 1237 my $wcdir = $destdir; 1238 # HACK: if $destdir is unknown, take last URL part and remove /.git$/ 1239 $wcdir = (split m|\.|, (split m|/|, $repo_url)[-1])[0] 1240 unless length $wcdir; 1241 if ($$tg_info{'topgit'} eq 'yes') { 1242 print "TopGit detected, populating top-bases ...\n"; 1243 system("cd $wcdir && tg remote --populate origin"); 1244 $rc = errorcode(); 1245 print STDERR "TopGit population failed\n" if $rc != 0; 1246 } 1247 system("cd $wcdir && git config user.name \"$ENV{'DEBFULLNAME'}\"") 1248 if (defined($ENV{'DEBFULLNAME'})); 1249 system("cd $wcdir && git config user.email \"$ENV{'DEBEMAIL'}\"") 1250 if (defined($ENV{'DEBEMAIL'})); 1251 if (length $git_track) { 1252 my @heads; 1253 if ($git_track eq '*') { 1254 @heads = git_ls_remote($repo_url, 'refs/heads'); 1255 } else { 1256 @heads = split ' ', $git_track; 1257 } 1258 # Filter out any branches already populated via TopGit 1259 my @tgheads = split ' ', $$tg_info{'top-bases'}; 1260 my $master = 'master'; 1261 if ( 1262 open(HEAD, 1263 "env GIT_DIR=\"$wcdir/.git\" git symbolic-ref HEAD |" 1264 ) 1265 ) { 1266 $master = <HEAD>; 1267 chomp $master; 1268 $master =~ s@refs/heads/@@; 1269 } 1270 close(HEAD); 1271 foreach my $head (@heads) { 1272 next if $head eq $master; 1273 next if grep { $head eq $_ } @tgheads; 1274 my $cmd = "cd $wcdir"; 1275 $cmd .= " && git branch --track $head remotes/origin/$head"; 1276 system($cmd); 1277 } 1278 } 1279 } elsif ($repo_type eq 'hg') { 1280 my $username = ''; 1281 $username .= " $ENV{'DEBFULLNAME'}" if (defined($ENV{'DEBFULLNAME'})); 1282 $username .= " <$ENV{'DEBEMAIL'}>" if (defined($ENV{'DEBEMAIL'})); 1283 if ($username) { 1284 if (open(HGRC, '>>', "$destdir/.hg/hgrc")) { 1285 print HGRC "[ui]\nusername =$username\n"; 1286 close HGRC; 1287 } else { 1288 print STDERR "failed to open hgrc to set username: $!\n"; 1289 } 1290 } 1291 } 1292 die "post-checkout action failed\n" 1293 if $rc != 0; 1294 1295 if ($unpack_source) { 1296 unless ($pkg) { 1297 print STDERR 1298 "could not determine package name for orig.tar.gz retrieval\n"; 1299 $rc ||= 1; 1300 exit($rc); 1301 } 1302 unpack_source($pkg, $version, $destdir, $origtgz_name, $unpack_source) 1303 or $rc = 1; 1304 } 1305 1306 exit($rc); 1307} 1308 1309main(); 1310 1311# vim:sw=4 1312