1#!/usr/bin/perl
2#
3# debcheckout: checkout the development repository of a Debian package
4# Copyright (C) 2007-2009  Stefano Zacchiroli <zack@debian.org>
5# Copyright (C) 2010  Christoph Berg <myon@debian.org>
6#
7# This program is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <https://www.gnu.org/licenses/>.
19#
20
21# Created: Tue, 14 Aug 2007 10:20:55 +0200
22# Last-Modified: $Date$
23
24=head1 NAME
25
26debcheckout - checkout the development repository of a Debian package
27
28=head1 SYNOPSIS
29
30=over
31
32=item B<debcheckout> [I<OPTIONS>] I<PACKAGE> [I<DESTDIR>]
33
34=item B<debcheckout> [I<OPTIONS>] I<REPOSITORY_URL> [I<DESTDIR>]
35
36=item B<debcheckout> B<--help>
37
38=back
39
40=head1 DESCRIPTION
41
42B<debcheckout> retrieves the information about the Version Control System used
43to maintain a given Debian package (the I<PACKAGE> argument), and then checks
44out the latest (potentially unreleased) version of the package from its
45repository.  By default the repository is checked out to the I<PACKAGE>
46directory; this can be overridden by providing the I<DESTDIR> argument.
47
48The information about where the repository is available is expected to be found
49in B<Vcs-*> fields available in the source package record. For example, the B<vim>
50package exposes such information with a field like S<B<Vcs-Hg:
51http://hg.debian.org/hg/pkg-vim/vim>>, you can see it by grepping through
52B<apt-cache showsrc vim>.
53
54If more than one source package record containing B<Vcs-*> fields is available,
55B<debcheckout> will select the record with the highest version number.
56Alternatively, a particular version may be selected from those available by
57specifying the package name as I<PACKAGE>=I<VERSION>.
58
59If you already know the URL of a given repository you can invoke
60B<debcheckout> directly on it, but you will probably need to pass the
61appropriate B<-t> flag. That is, some heuristics are in use to guess
62the repository type from the URL; if they fail, you might want to
63override the guessed type using B<-t>.
64
65The currently supported version control systems are: Arch (arch), Bazaar (bzr), CVS (cvs),
66Darcs (darcs), Git (git), Mercurial (hg) and Subversion (svn).
67
68=head1 OPTIONS
69
70B<GENERAL OPTIONS>
71
72=over
73
74=item B<-a>, B<--auth>
75
76Work in authenticated mode; this means that for known repositories (mainly those
77hosted on S<I<https://salsa.debian.org>>) URL rewriting is attempted before
78checking out, to ensure that the repository can be committed to. For example,
79for Git repositories hosted on Salsa this means that
80S<I<git@salsa.debian.org:...git>> will be used instead of
81S<I<https://salsa.debian.org/...git>>.
82
83There are built-in rules for salsa.debian.org, alioth.debian.org and github.com. Other hosts
84can be configured using B<DEBCHECKOUT_AUTH_URLS>.
85
86=item B<-d>, B<--details>
87
88Only print a list of detailed information about the package
89repository, without checking it out; the output format is a list of
90fields, each field being a pair of TAB-separated field name and field
91value. The actual fields depend on the repository type. This action
92might require a network connection to the remote repository.
93
94Also see B<-p>. This option and B<-p> are mutually exclusive.
95
96=item B<-h>, B<--help>
97
98Print a detailed help message and exit.
99
100=item B<-p>, B<--print>
101
102Only print a summary about package repository information, without
103checking it out; the output format is TAB-separated with two fields:
104repository type, repository URL. This action works offline, it only
105uses "static" information as known by APT's cache.
106
107Also see B<-d>. This option and B<-d> are mutually exclusive.
108
109=item B<-P> I<package>, B<--package> I<package>
110
111When checking out a repository URL, instead of trying to guess the package name
112from the URL, use this package name.
113
114=item B<-t> I<TYPE>, B<--type> I<TYPE>
115
116Override the repository type (which defaults to some heuristics based
117on the URL or, in case of heuristic failure, the fallback "git");
118should be one of the currently supported repository types.
119
120=item B<-u> I<USERNAME>, B<--user> I<USERNAME>
121
122Specify the login name to be used in authenticated mode (see B<-a>). This option
123implies B<-a>: you don't need to specify both.
124
125=item B<-f> I<FILE>, B<--file> I<FILE>
126
127Specify that the named file should be extracted from the repository and placed
128in the destination directory. May be used more than once to extract multiple
129files.
130
131=item B<--source=never>|B<auto>|B<download-only>|B<always>
132
133Some packages only place the F<debian> directory in version control.
134B<debcheckout> can retrieve the remaining parts of the source using B<apt-get
135source> and move the files into the checkout.
136
137=over
138
139=item B<never>
140
141Only use the repository.
142
143=item B<auto> (default)
144
145If the repository only contains the F<debian> directory, retrieve the source
146package, unpack it, and also place the F<.orig.tar.gz> file into the current
147directory. Else, do nothing.
148
149=item B<download-only>
150
151Always retrieve the I<.orig.tar.gz> file, but do not unpack it.
152
153=item B<always>
154
155Always retrieve the I<.orig.tar.gz> file, and if the repository only contains the
156F<debian> directory, unpack it.
157
158=back
159
160=back
161
162B<VCS-SPECIFIC OPTIONS>
163
164I<GIT-SPECIFIC OPTIONS>
165
166=over
167
168=item B<--git-track> I<BRANCHES>
169
170Specify a list of remote branches which will be set up for tracking
171(as in S<B<git branch --track>>, see B<git-branch>(1)) after the remote
172Git repository has been cloned. The list should be given as a
173space-separated list of branch names.
174
175As a shorthand, the string "B<*>" can be given to require tracking of all
176remote branches.
177
178=back
179
180=head1 CONFIGURATION VARIABLES
181
182The two configuration files F</etc/devscripts.conf> and
183F<~/.devscripts> are sourced by a shell in that order to set
184configuration variables. Command line options can be used to override
185configuration file settings. Environment variable settings are ignored
186for this purpose. The currently recognised variables are:
187
188=over
189
190=item B<DEBCHECKOUT_AUTH_URLS>
191
192This variable should be a space separated list of Perl regular
193expressions and replacement texts, which must come in pairs: I<REGEXP>
194I<TEXT> I<REGEXP> I<TEXT> ... and so on. Each pair denotes a substitution which
195is applied to repository URLs if other built-in means of building URLs
196for authenticated mode (see B<-a>) have failed.
197
198References to matching substrings in the replacement texts are
199allowed as usual in Perl by the means of B<$1>, B<$2>, ... and so on.
200
201This setting is used to configure the "authenticated mode" location for
202repositories. The Debian repositories on S<salsa.debian.org> are implicitly
203defined, as is S<github.com>.
204
205Here is a sample snippet suitable for the configuration files:
206
207 DEBCHECKOUT_AUTH_URLS='
208  ^\w+://(svn\.example\.com)/(.*)    svn+ssh://$1/srv/svn/$2
209  ^\w+://(git\.example\.com)/(.*)    git+ssh://$1/home/git/$2
210 '
211
212Note that whitespace is not allowed in either regexps or
213replacement texts. Also, given that configuration files are sourced by
214a shell, you probably want to use single quotes around the value of
215this variable.
216
217=item B<DEBCHECKOUT_SOURCE>
218
219This variable determines under what scenarios the associated orig.tar.gz for a
220package will be downloaded.  See the B<--source> option for a description of
221the values.
222
223=item B<DEBCHECKOUT_USER>
224
225This variable sets the username for authenticated mode. It can be overridden
226with the B<--user> option. Setting this variable does not imply the use of
227authenticated mode, it still has to be activated with B<--auth>.
228
229=back
230
231=head1 SEE ALSO
232
233B<apt-cache>(8), Section 6.2.5 of the Debian Developer's Reference (for
234more information about B<Vcs-*> fields): S<I<https://www.debian.org/doc/developers-reference/best-pkging-practices.html#bpp-vcs>>.
235
236=head1 AUTHOR
237
238B<debcheckout> and this manpage have been written by Stefano Zacchiroli
239<I<zack@debian.org>>.
240
241=cut
242
243use strict;
244use warnings;
245no if $] >= 5.018, 'warnings', 'experimental::smartmatch';
246use feature 'switch';
247use Getopt::Long qw(:config bundling permute no_getopt_compat);
248use Pod::Usage;
249use File::Basename;
250use File::Copy qw/copy/;
251use File::Temp qw/tempdir/;
252use Cwd;
253use Devscripts::Compression;
254use Devscripts::Versort;
255
256my @files = ();    # files to checkout
257
258my $compression_re = compression_get_file_extension_regex();
259
260# <snippet from="bts.pl">
261# <!-- TODO we really need to factor out in a Perl module the
262#      configuration file parsing code -->
263my @config_files = ('/etc/devscripts.conf', '~/.devscripts');
264my %config_vars  = (
265    'DEBCHECKOUT_AUTH_URLS' => '',
266    'DEBCHECKOUT_SOURCE'    => 'auto',
267    'DEBCHECKOUT_USER'      => '',
268);
269my %config_default = %config_vars;
270my $shell_cmd;
271# Set defaults
272foreach my $var (keys %config_vars) {
273    $shell_cmd .= qq[$var="$config_vars{$var}";\n];
274}
275$shell_cmd .= 'for file in ' . join(" ", @config_files) . "; do\n";
276$shell_cmd .= '[ -f $file ] && . $file; done;' . "\n";
277# Read back values
278foreach my $var (keys %config_vars) { $shell_cmd .= "echo \$$var;\n" }
279my $shell_out = `/bin/bash -c '$shell_cmd'`;
280@config_vars{ keys %config_vars } = split /\n/, $shell_out, -1;
281# </snippet>
282
283my $lwp_broken;
284my $ua;
285
286sub have_lwp() {
287    return ($lwp_broken ? 0 : 1) if defined $lwp_broken;
288    eval {
289        require LWP;
290        require LWP::UserAgent;
291    };
292
293    if ($@) {
294        if ($@ =~ m%^Can\'t locate LWP%) {
295            $lwp_broken = "the libwww-perl package is not installed";
296        } else {
297            $lwp_broken = "couldn't load LWP::UserAgent: $@";
298        }
299    } else {
300        $lwp_broken = '';
301    }
302    return $lwp_broken ? 0 : 1;
303}
304
305sub init_agent {
306    $ua = new LWP::UserAgent;    # we create a global UserAgent object
307    $ua->agent("LWP::UserAgent/Devscripts");
308    $ua->env_proxy;
309}
310
311sub recurs_mkdir {
312    my ($dir) = @_;
313    my @temp      = split /\//, $dir;
314    my $createdir = "";
315    foreach my $piece (@temp) {
316        if (!length $createdir and !length $piece) {
317            $createdir = "/";
318        } elsif (length $createdir and $createdir ne "/") {
319            $createdir .= "/";
320        }
321        $createdir .= "$piece";
322        if (!-d $createdir) {
323            mkdir($createdir) or return 0;
324        }
325    }
326    return 1;
327}
328
329# Find the repository URL (and type) for a given package name, parsing Vcs-*
330# fields.  Returns (version, type, url, origtgz_name) tuple.
331sub find_repo($$) {
332    my ($pkg, $desired_ver) = @_;
333    my @repo  = ("", 0, "", "");
334    my $found = 0;
335    my ($nonepoch_version, $version) = ("", "");
336    my $origtgz_name = "";
337    my $type         = "";
338    my $url          = "";
339    my @repos        = ();
340
341    open(APT, "apt-cache showsrc $pkg |");
342    while (my $line = <APT>) {
343        $found = 1;
344        chomp($line);
345        if ($line =~ /^(x-)?vcs-(\w+):\s*(.*)$/i) {
346            next if lc($2) eq "browser";
347            ($type, $url) = (lc($2), $3);
348        } elsif ($line =~ /^Version:\s*(.*)$/i) {
349            $version = $1;
350            ($nonepoch_version = $version) =~ s/^\d+://;
351        } elsif ($line
352            =~ /^ [a-f0-9]{32} \d+ (\S+)(?:_\Q$nonepoch_version\E|\.orig)\.tar\.$compression_re$/
353        ) {
354            $origtgz_name = $1;
355        } elsif ($line =~ /^$/) {
356            push(@repos, [$version, $type, $url, $origtgz_name])
357              if (  $version
358                and $type
359                and $url
360                and ($desired_ver eq "" or $desired_ver eq $version));
361            $version      = "";
362            $type         = "";
363            $url          = "";
364            $origtgz_name = "";
365        }
366    }
367    close(APT);
368    die "unknown package '$pkg'\n" unless $found;
369
370    if (@repos) {
371        @repos = Devscripts::Versort::versort(@repos);
372        @repo  = @{ $repos[0] };
373    }
374    return @repo;
375}
376
377# Find the browse URL for a given package name, parsing Vcs-* fields.
378sub find_browse($$) {
379    my ($pkg, $desired_ver) = @_;
380    my $browse  = "";
381    my $found   = 0;
382    my $version = "";
383    my @browses;
384
385    open(APT, "apt-cache showsrc $pkg |");
386    while (my $line = <APT>) {
387        $found = 1;
388        chomp($line);
389        if ($line =~ /^(x-)?vcs-(\w+):\s*(.*)$/i) {
390            if (lc($2) eq "browser") {
391                $browse = $3;
392            }
393        } elsif ($line =~ /^Version:\s*(.*)$/i) {
394            $version = $1;
395        } elsif ($line =~ /^$/) {
396            push(@browses, [$version, $browse])
397              if $version
398              and $browse
399              and ($desired_ver eq "" or $desired_ver eq $version);
400            $version = "";
401            $browse  = "";
402        }
403    }
404    close(APT);
405    die "unknown package '$pkg'\n" unless $found;
406    if (@browses) {
407        @browses = Devscripts::Versort::versort(@browses);
408        $browse  = $browses[0][1];
409    }
410    return $browse;
411}
412
413# Patch the cmdline invocation of a VCS to ensure the repository is checkout to
414# a given target directory.
415sub set_destdir($$@) {
416    my ($repo_type, $destdir, @cmd) = @_;
417    $destdir =~ s|^-d\s*||;
418
419    given ($repo_type) {
420        when ("cvs") {
421            my $module = pop @cmd;
422            push @cmd, ("-d", $destdir, $module);
423        }
424        when (/^(bzr|darcs|git|hg|svn)$/) {
425            push @cmd, $destdir;
426        }
427        default {
428            die
429"sorry, don't know how to set the destination directory for $repo_type repositories (patches welcome!)\n";
430        }
431    }
432    return @cmd;
433}
434
435# try patching a repository URL to enable authenticated mode, *relying
436# only on user defined rules*
437sub user_set_auth($$) {
438    my ($repo_type, $url) = @_;
439    my @rules = split ' ', $config_vars{'DEBCHECKOUT_AUTH_URLS'};
440    while (my $pat = shift @rules) {    # read pairs for s/$pat/$subst/
441        my $subst = shift @rules
442          or die
443"Configuration error for DEBCHECKOUT_AUTH_URLS: regexp and replacement texts must come in pairs. See debcheckout(1).\n";
444        $url =~ s/$pat/qq("$subst")/ee;    # ZACK: my worst Perl line ever
445    }
446    return $url;
447}
448
449# Patch a given repository URL to ensure that the checked out out repository
450# can be committed to. Only works for well known repositories (mainly Salsa's).
451sub set_auth($$$$) {
452    my ($repo_type, $url, $user, $dont_act) = @_;
453
454    my $old_url = $url;
455
456    $user .= "@" if length $user;
457    my $user_local = $user;
458    $user_local =~ s|(.*)(@)|$1|;
459    my $user_url = $url;
460
461# Adjust alioth urls from new-style anonymous access to old-style and then deal
462# with adjusting for authentication on alioth
463    $url
464      =~ s@(?:alioth\.debian\.org/(?:anonscm/bzr|scm/loggerhead/bzr)|anonscm\.debian\.org/bzr(?:/bzr)?)@bzr.debian.org/bzr@;
465    $url
466      =~ s@(?:alioth\.debian\.org/anonscm/darcs|anonscm\.debian\.org/darcs)@darcs.debian.org/darcs@;
467    $url =~ s@git://anonscm\.debian\.org@git://git.debian.org@;
468    $url
469      =~ s@(?:alioth\.debian\.org/anonscm/c?git|anonscm\.debian\.org/c?git)@git.debian.org/git@;
470    $url
471      =~ s@(?:alioth\.debian\.org/anonscm/hg|anonscm\.debian\.org/hg)@hg.debian.org/hg@;
472    $url =~ s@svn://(?:scm\.alioth|anonscm)\.debian\.org@svn://svn.debian.org@;
473
474    # other providers
475    $url =~ s!(?:git|https?)://github\.com/!git\@github.com:!;
476
477    given ($repo_type) {
478        when ("bzr") {
479            $url
480              =~ s|^[\w+]+://(bzr\.debian\.org)/(.*)|bzr+ssh://$user$1/bzr/$2|;
481            $url
482              =~ s[^\w+://(?:(bazaar|code)\.)?(launchpad\.net/.*)][bzr+ssh://${user}bazaar.$2];
483        }
484        when ("darcs") {
485            if ($url =~ m|(~)|) {
486                $user_url =~ s|^\w+://(darcs\.debian\.org)/(~)(.*?)/.*|$3|;
487                die
488"the local user '$user_local' doesn't own the personal repository '$url'\n"
489                  if $user_local ne $user_url and !$dont_act;
490                $url
491                  =~ s|^\w+://(darcs\.debian\.org)/(~)(.*?)/(.*)|$user$1:~/public_darcs/$4|;
492            } else {
493                $url
494                  =~ s|^\w+://(darcs\.debian\.org)/(?:darcs/)?(.*)|$user$1:/darcs/$2|;
495            }
496        }
497        when ("git") {
498            if ($url =~ s!^https://salsa.debian.org/!git\@salsa.debian.org:!) {
499            } elsif ($url =~ m%(/users/|~)%) {
500                $user_url
501                  =~ s|^\w+://(git\.debian\.org)/git/users/(.*?)/.*|$2|;
502                $user_url =~ s|^\w+://(git\.debian\.org)/~(.*?)/.*|$2|;
503
504                die
505"the local user '$user_local' doesn't own the personal repository '$url'\n"
506                  if $user_local ne $user_url and !$dont_act;
507                $url
508                  =~ s|^\w+://(git\.debian\.org)/git/users/.*?/(.*)|git+ssh://$user$1/~/public_git/$2|;
509                $url
510                  =~ s|^\w+://(git\.debian\.org)/~.*?/(.*)|git+ssh://$user$1/~/public_git/$2|;
511            } else {
512                $url
513                  =~ s|^\w+://(git\.debian\.org)/(?:git/)?(.*)|git+ssh://$user$1/git/$2|;
514            }
515            $url
516              =~ s[^\w+://(?:(git|code)\.)?(launchpad\.net/.*)][git+ssh://${user}git.$2];
517        }
518  # "hg ssh://" needs an extra slash so paths are not based in the user's $HOME
519        when ("hg") {
520            $url =~ s|^\w+://(hg\.debian\.org/)|ssh://$user$1/|;
521        }
522        when ("svn") {
523            $url =~ s|^\w+://(svn\.debian\.org)/(.*)|svn+ssh://$user$1/svn/$2|;
524        }
525        default {
526            die
527"sorry, don't know how to enable authentication for $repo_type repositories (patches welcome!)\n";
528        }
529    }
530    if ($url eq $old_url) {    # last attempt: try with user-defined rules
531        $url = user_set_auth($repo_type, $url);
532    }
533    die
534"can't use authenticated mode on repository '$url' since it is not a known repository (e.g. salsa.debian.org)\n"
535      if $url eq $old_url;
536    return $url;
537}
538
539# Hack around specific, known deficiencies in repositories that don't follow
540# standard behavior.
541sub munge_url($$) {
542    my ($repo_type, $repo_url) = @_;
543
544    given ($repo_type) {
545        when ('bzr') {
546          # bzr.d.o explicitly doesn't run a smart server.  Need to use nosmart
547            $repo_url
548              =~ s|^http://(bzr\.debian\.org)/(.*)|nosmart+http://$1/$2|;
549        }
550    }
551    return $repo_url;
552}
553
554# returns an error code after system(). If system() exited normally, this is the
555# error code of the child process. If it exited with a signal (if a user hit
556# C-c, say) then this returns something <0. In either case, errorcode()==0 means
557# "success"
558sub errorcode {
559    my $code = $? >> 8;
560    if ($code == 0 && $? != 0) {
561        return -$?;
562    }
563    return $code;
564}
565
566# Checkout a given repository in a given destination directory.
567sub checkout_repo($$$$) {
568    my ($repo_type, $repo_url, $destdir, $anon_repo_url) = @_;
569    my (@cmd, @extracmd);
570
571    given ($repo_type) {
572        when ("arch") { @cmd = ("tla", "grab",   $repo_url); }    # XXX ???
573        when ("bzr")  { @cmd = ("bzr", "branch", $repo_url); }
574        when ("cvs") {
575            $repo_url =~ s|^-d\s*||;
576            my ($root, $module) = split /\s+/, $repo_url;
577            $module ||= '';
578            @cmd = ("cvs", "-d", $root, "checkout", $module);
579        }
580        when ("darcs") { @cmd = ("darcs", "get", $repo_url); }
581        when ("git") {
582            my $push_url;
583
584            if (defined $anon_repo_url and length $anon_repo_url) {
585                if ($repo_url =~ m|(.*)\s+-b\s+(.*)|) {
586                    $push_url = $1;
587                } else {
588                    $push_url = $repo_url;
589                }
590
591                $repo_url = $anon_repo_url;
592            }
593
594            if ($repo_url =~ m|(.*)\s+-b\s+(.*)|) {
595                @cmd = ("git", "clone", $1, "-b", $2);
596            } else {
597                @cmd = ("git", "clone", $repo_url);
598            }
599
600            if ($push_url) {
601                @extracmd = ('git', 'remote', 'set-url', '--push', 'origin',
602                    $push_url);
603            }
604        }
605        when ("hg")  { @cmd = ("hg", "clone", $repo_url); }
606        when ("svn") { @cmd = ("svn", "co", $repo_url); }
607        default { die "unsupported version control system '$repo_type'.\n"; }
608    }
609    @cmd = set_destdir($repo_type, $destdir, @cmd) if length $destdir;
610    print "@cmd ...\n";
611    system @cmd;
612    my $rc = errorcode();
613
614    if ($rc == 0 && @extracmd) {
615        my $oldcwd = getcwd();
616        my $clonedir;
617
618        print "@extracmd ...\n";
619
620        if (length $destdir) {
621            $clonedir = $destdir;
622        } else {
623            ($clonedir = $repo_url) =~ s|.*/(.*)(.git)?|$1|;
624        }
625
626        chdir $clonedir;
627        system @extracmd;
628        $rc = errorcode();
629        chdir($oldcwd);
630    }
631
632    return $rc;
633}
634
635# Checkout a given set of files from a given repository in a given
636# destination directory.
637sub checkout_files($$$$) {
638    my ($repo_type, $repo_url, $destdir, $browse_url) = @_;
639    my @cmd;
640    my $tempdir;
641
642    foreach my $file (@files) {
643        my $fetched = 0;
644
645        # Cheap'n'dirty escaping
646        # We should possibly depend on URI::Escape, but this should do...
647        my $escaped_file = $file;
648        $escaped_file =~ s|\+|%2B|g;
649
650        my $dir;
651        if (defined $destdir and length $destdir) {
652            $dir = "$destdir/";
653        } else {
654            $dir = "./";
655        }
656        $dir .= dirname($file);
657
658        if (!recurs_mkdir($dir)) {
659            print STDERR "Failed to create directory $dir\n";
660            return 1;
661        }
662
663        given ($repo_type) {
664            when ("arch") {
665                # If we've already retrieved a copy of the repository,
666                # reuse it
667                if (!length($tempdir)) {
668                    if (
669                        !(
670                            $tempdir = tempdir(
671                                "debcheckoutXXXX",
672                                TMPDIR  => 1,
673                                CLEANUP => 1
674                            ))
675                    ) {
676                        print STDERR
677                          "Failed to create temporary directory . $!\n";
678                        return 1;
679                    }
680
681                    my $oldcwd = getcwd();
682                    chdir $tempdir;
683                    @cmd = ("tla", "grab", $repo_url);
684                    print "@cmd ...\n";
685                    my $rc = system(@cmd);
686                    chdir $oldcwd;
687                    return ($rc >> 8) if $rc != 0;
688                }
689
690                if (!copy("$tempdir/$file", $dir)) {
691                    print STDERR "Failed to copy $file to $dir: $!\n";
692                    return 1;
693                }
694            }
695            when ("cvs") {
696                if (!length($tempdir)) {
697                    if (
698                        !(
699                            $tempdir = tempdir(
700                                "debcheckoutXXXX",
701                                TMPDIR  => 1,
702                                CLEANUP => 1
703                            ))
704                    ) {
705                        print STDERR
706                          "Failed to create temporary directory . $!\n";
707                        return 1;
708                    }
709                }
710                $repo_url =~ s|^-d\s*||;
711                my ($root, $module) = split /\s+/, $repo_url;
712                # If an explicit module name isn't present, use the last
713                # component of the URL
714                if (!length($module)) {
715                    $module = $repo_url;
716                    $module =~ s%^.*/(.*?)$%$1%;
717                }
718                $module .= "/$file";
719                $module =~ s%//%/%g;
720
721                my $oldcwd = getcwd();
722                chdir $tempdir;
723                @cmd = ("cvs", "-d", $root, "export", "-r", "HEAD", "-f",
724                    $module);
725                print "\n@cmd ...\n";
726                system @cmd;
727                if (errorcode() != 0) {
728                    chdir $oldcwd;
729                    return (errorcode());
730                } else {
731                    chdir $oldcwd;
732                    if (copy("$tempdir/$module", $dir)) {
733                        print "Copied to $destdir/$file\n";
734                    } else {
735                        print STDERR "Failed to copy $file to $dir: $!\n";
736                        return 1;
737                    }
738                }
739            }
740            when (/(svn|bzr)/) {
741                @cmd = ($repo_type, "cat", "$repo_url/$file");
742                print "@cmd > $dir/" . basename($file) . " ... \n";
743                if (!open CAT, '-|', @cmd) {
744                    print STDERR "Failed to execute @cmd $!\n";
745                    return 1;
746                }
747                local $/;
748                my $content = <CAT>;
749                close CAT;
750                if (!open OUTPUT, ">", $dir . "/" . basename($file)) {
751                    print STDERR "Failed to create output file "
752                      . basename($file) . " $!\n";
753                    return 1;
754                }
755                print OUTPUT $content;
756                close OUTPUT;
757            }
758            when (/(darcs|hg)/) {
759                # Subtly different but close enough
760                if (have_lwp) {
761                    print "Attempting to retrieve $file via HTTP ...\n";
762
763                    my $file_url
764                      = $repo_type eq "darcs"
765                      ? "$repo_url/$escaped_file"
766                      : "$repo_url/raw-file/tip/$file";
767                    init_agent() unless $ua;
768                    my $request  = HTTP::Request->new('GET', "$file_url");
769                    my $response = $ua->request($request);
770                    if ($response->is_success) {
771                        if (!open OUTPUT, ">", $dir . "/" . basename($file)) {
772                            print STDERR "Failed to create output file "
773                              . basename($file) . " $!\n";
774                            return 1;
775                        }
776                        print "Writing to $dir/" . basename($file) . " ... \n";
777                        print OUTPUT $response->content;
778                        close OUTPUT;
779                        $fetched = 1;
780                    }
781                }
782                if ($fetched == 0) {
783                    # If we've already retrieved a copy of the repository,
784                    # reuse it
785                    if (!length($tempdir)) {
786                        if (
787                            !(
788                                $tempdir = tempdir(
789                                    "debcheckoutXXXX",
790                                    TMPDIR  => 1,
791                                    CLEANUP => 1
792                                ))
793                        ) {
794                            print STDERR
795                              "Failed to create temporary directory . $!\n";
796                            return 1;
797                        }
798
799                   # Can't get / clone in to a directory that already exists...
800                        $tempdir .= "/repo";
801                        if ($repo_type eq "darcs") {
802                            @cmd = ("darcs", "get", $repo_url, $tempdir);
803                        } else {
804                            @cmd = ("hg", "clone", $repo_url, $tempdir);
805                        }
806                        print "@cmd ...\n";
807                        my $rc = system(@cmd);
808                        return ($rc >> 8) if $rc != 0;
809                        print "\n";
810                    }
811                }
812                if (copy "$tempdir/$file", $dir) {
813                    print "Copied $file to $dir\n";
814                } else {
815                    print STDERR "Failed to copy $file to $dir: $!\n";
816                    return 1;
817                }
818            }
819            when ("git") {
820                # If there isn't a browse URL (either because the package
821                # doesn't ship one, or because we were called with a URL,
822                # try a common pattern for gitweb
823                if (!length($browse_url)) {
824                    if ($repo_url =~ m%^\w+://([^/]+)/(?:git/)?(.*)$%) {
825                        $browse_url = "http://$1/?p=$2";
826                    }
827                }
828                if (have_lwp and $browse_url =~ /^http/) {
829                    $escaped_file =~ s|/|%2F|g;
830
831                    print "Attempting to retrieve $file via HTTP ...\n";
832
833                    init_agent() unless $ua;
834                    my $file_url = "$browse_url;a=blob_plain";
835                    $file_url .= ";f=$escaped_file;hb=HEAD";
836                    my $request  = HTTP::Request->new('GET', $file_url);
837                    my $response = $ua->request($request);
838                    my $error    = 0;
839                    if (!$response->is_success) {
840                        if ($browse_url =~ /\.git$/) {
841                            print "Error retrieving file: "
842                              . $response->status_line . "\n";
843                            $error = 1;
844                        } else {
845                            $browse_url .= ".git";
846                            $file_url = "$browse_url;a=blob_plain";
847                            $file_url .= ";f=$escaped_file;hb=HEAD";
848                            $request  = HTTP::Request->new('GET', $file_url);
849                            $response = $ua->request($request);
850                            if (!$response->is_success) {
851                                print "Error retrieving file: "
852                                  . $response->status_line . "\n";
853                                $error = 1;
854                            }
855                        }
856                    }
857                    if (!$error) {
858                        if (!open OUTPUT, ">", $dir . "/" . basename($file)) {
859                            print STDERR "Failed to create output file "
860                              . basename($file) . " $!\n";
861                            return 1;
862                        }
863                        print "Writing to $dir/" . basename($file) . " ... \n";
864                        print OUTPUT $response->content;
865                        close OUTPUT;
866                        $fetched = 1;
867                    }
868                }
869                if ($fetched == 0) {
870                    # If we've already retrieved a copy of the repository,
871                    # reuse it
872                    if (!length($tempdir)) {
873                        if (
874                            !(
875                                $tempdir = tempdir(
876                                    "debcheckoutXXXX",
877                                    TMPDIR  => 1,
878                                    CLEANUP => 1
879                                ))
880                        ) {
881                            print STDERR
882                              "Failed to create temporary directory . $!\n";
883                            return 1;
884                        }
885                        # Since git won't clone in to a directory that
886                        # already exists...
887                        $tempdir .= "/repo";
888                        # Can't shallow clone from an http:: URL
889                        $repo_url =~ s/^http/git/;
890                        @cmd = (
891                            "git", "clone", "--depth", "1", $repo_url,
892                            "$tempdir"
893                        );
894                        print "@cmd ...\n\n";
895                        my $rc = system(@cmd);
896                        return ($rc >> 8) if $rc != 0;
897                        print "\n";
898                    }
899
900                    my $oldcwd = getcwd();
901                    chdir $tempdir;
902
903                    @cmd = ($repo_type, "show", "HEAD:$file");
904                    print "@cmd ... > $dir/" . basename($file) . "\n";
905                    if (!open CAT, '-|', @cmd) {
906                        print STDERR "Failed to execute @cmd $!\n";
907                        chdir $oldcwd;
908                        return 1;
909                    }
910                    chdir $oldcwd;
911                    local $/;
912                    my $content = <CAT>;
913                    close CAT;
914                    if (!open OUTPUT, ">", $dir . "/" . basename($file)) {
915                        print STDERR "Failed to create output file "
916                          . basename($file) . " $!\n";
917                        return 1;
918                    }
919                    print OUTPUT $content;
920                    close OUTPUT;
921                }
922            }
923            default {
924                die "unsupported version control system '$repo_type'.\n";
925            }
926        }
927    }
928
929    # If we've got this far, all the files were retrieved successfully
930    return 0;
931}
932
933# download source package, unpack it, and merge its contents into the checkout
934sub unpack_source($$$$$) {
935    my ($pkg, $version, $destdir, $origtgz_name, $unpack_source) = @_;
936
937    return 1 if ($unpack_source eq 'never');
938    return 1
939      if (defined $origtgz_name and $origtgz_name eq '')
940      ;    # only really relevant with URL on command line
941
942    $destdir ||= $pkg;
943    # Apt will auto-resolve binary package names to source package names.  We
944    # need to know the source package name to correctly identify the source
945    # package artifacts (dsc, orig.tar.*, etc)
946    (my $srcpkg = $origtgz_name) =~ s/_.*//;
947    # is this a debian-dir-only repository?
948    unless (-d $destdir) {
949        print STDERR
950"debcheckout did not create the $destdir directory - this is probably a bug\n";
951        return 0;
952    }
953    my @repo_files  = glob "$destdir/*";
954    my $debian_only = 0;
955    if (@repo_files == 1 and $repo_files[0] eq "$destdir/debian") {
956        $debian_only = 1;
957    }
958
959    return 1 if ($unpack_source eq 'auto' and not $debian_only);
960    if ($unpack_source ne 'download-only' and $debian_only) {
961        print
962"repository only contains the debian directory, using apt-get source\n";
963    }
964
965    my $tmpdir = File::Temp->newdir(DIR => ".");
966
967    # unpack
968    my $oldcwd = getcwd();
969    chdir $tmpdir;
970    my @args = ('source');
971    push @args, '--download-only'
972      if ($unpack_source eq 'download-only' or not $debian_only);
973    push @args, $version ? "$srcpkg=$version" : $srcpkg;
974    system('apt-get', @args);
975    chdir $oldcwd;
976
977    if (errorcode()) {
978        print STDERR "apt-get source failed\n";
979        return 0;
980    }
981
982    # put source package in place
983    foreach my $sourcefile (glob "$tmpdir/${srcpkg}_*") {
984        next unless (-f $sourcefile);    # skip directories
985        my $base = $sourcefile;
986        $base =~ s!.*/!!;
987        rename $sourcefile, $base or die "rename $sourcefile $base: $!";
988    }
989
990    return 1 if ($unpack_source eq 'download-only' or not $debian_only);
991
992    # figure out which directory was created
993    my @dirs = glob "$tmpdir/$srcpkg-*/";
994    unless (@dirs) {
995        print STDERR
996          "apt-get source did not create any $tmpdir/$srcpkg-* directory\n";
997        return 0;
998    }
999    my $directory = $dirs[0];
1000    chop $directory;
1001
1002    # move all files over, except the debian directory
1003    opendir DIR, $directory or die "opendir $directory: $!";
1004    foreach my $file (readdir DIR) {
1005        if ($file eq 'debian') {
1006            system('rm', '-rf', "$directory/$file");
1007        } elsif ($file eq '.' or $file eq '..') {
1008            next;
1009        } else {
1010            rename "$directory/$file", "$destdir/$file"
1011              or die "rename $directory/$file $destdir/$file: $!";
1012        }
1013    }
1014    closedir DIR;
1015    rmdir $directory or die "rmdir $directory: $!";
1016
1017    # $tmpdir is automatically removed
1018    return 1;
1019}
1020
1021# Print information about a repository and quit.
1022sub print_repo($$) {
1023    my ($repo_type, $repo_url) = @_;
1024
1025    print "$repo_type\t$repo_url\n";
1026    exit(0);
1027}
1028
1029sub git_ls_remote($$) {
1030    my ($url, $prefix) = @_;
1031
1032    $url =~ s|\s+-b\s+.*||;
1033    my $cmd = "git ls-remote '$url'";
1034    $cmd .= " '$prefix/*'" if length $prefix;
1035    open GIT, "$cmd |" or die "can't execute $cmd\n";
1036    my @refs;
1037    while (my $line = <GIT>) {
1038        chomp $line;
1039        my ($sha1, $name) = split /\s+/, $line;
1040        my $ref = $name;
1041        $ref = substr($ref, length($prefix) + 1) if length $prefix;
1042        push @refs, $ref;
1043    }
1044    close GIT;
1045    return @refs;
1046}
1047
1048# Given a GIT repository URL, extract its topgit info (if any), see
1049# the "topgit" package for more information
1050sub tg_info($) {
1051    my ($url) = @_;
1052
1053    my %info;
1054    $info{'topgit'}    = 'no';
1055    $info{'top-bases'} = '';
1056    my @bases = git_ls_remote($url, 'refs/top-bases');
1057    if (@bases) {
1058        $info{'topgit'}    = 'yes';
1059        $info{'top-bases'} = join ' ', @bases;
1060    }
1061    return (\%info);
1062}
1063
1064# Print details about a repository and quit.
1065sub print_details($$) {
1066    my ($repo_type, $repo_url) = @_;
1067
1068    print "type\t$repo_type\n";
1069    print "url\t$repo_url\n";
1070    if ($repo_type eq "git") {
1071        my $tg_info = tg_info($repo_url);
1072        while (my ($k, $v) = each %$tg_info) {
1073            print "$k\t$v\n";
1074        }
1075    }
1076    exit(0);
1077}
1078
1079sub guess_repo_type($$) {
1080    my ($repo_url, $default) = @_;
1081    my $repo_type = $default;
1082    if ($repo_url =~ /^(git|svn|bzr)(\+ssh)?:/) {
1083        $repo_type = $1;
1084    } elsif ($repo_url =~ /^https?:\/\/(svn|git|hg|bzr|darcs)\.debian\.org/) {
1085        $repo_type = $1;
1086    } elsif (
1087        $repo_url =~ m@^https?://anonscm.debian.org/(svn|c?git|hg|bzr|darcs)/@)
1088    {
1089        $repo_type = $1;
1090        $repo_type =~ s/cgit/git/;
1091    }
1092    return $repo_type;
1093}
1094
1095# Does a given string match the lexical rules for package names?
1096sub is_package($) {
1097    my ($arg) = @_;
1098
1099    return ($arg =~ /^[a-z0-9.+-]+$/);    # lexical rule for package names
1100}
1101
1102sub main() {
1103    my $auth    = 0;                      # authenticated mode
1104    my $destdir = "";                     # destination directory
1105    my $pkg     = "";                     # package name
1106    my $version = "";                     # package version
1107    my $origtgz_name
1108      = undef;    # orig.tar.gz name (or "" when none; undef means unknown)
1109    my $print_mode   = 0;   # print only mode
1110    my $details_mode = 0;   # details only mode
1111    my $use_package  = '';  # use this package instead of guessing from the URL
1112    my $repo_type = "git";  # default repo typo, overridden by '-t'
1113    my $repo_url  = "";     # repository URL
1114    my $anon_repo_url;      # repository URL (before auth mangling)
1115    my $user       = "";    # login name (authenticated mode only)
1116    my $browse_url = "";    # online browsable repository URL
1117    my $git_track  = "";    # list of remote GIT branches to --track
1118    my $unpack_source
1119      = $config_vars{DEBCHECKOUT_SOURCE};    # retrieve and unpack orig.tar.gz
1120    GetOptions(
1121        "auth|a"      => \$auth,
1122        "help|h"      => sub { pod2usage({ -exitval => 0, -verbose => 1 }); },
1123        "print|p"     => \$print_mode,
1124        "details|d"   => \$details_mode,
1125        "package|P=s" => \$use_package,
1126        "type|t=s"    => \$repo_type,
1127        "user|u=s"    => \$user,
1128        "file|f=s"    => sub { push(@files, $_[1]); },
1129        "git-track=s" => \$git_track,
1130        "source=s"    => \$unpack_source,
1131    ) or pod2usage({ -exitval => 3 });
1132    pod2usage({ -exitval => 3 }) if ($#ARGV < 0 or $#ARGV > 1);
1133    pod2usage({
1134            -exitval => 3,
1135            -message => "-d and -p are mutually exclusive.\n",
1136        }) if ($print_mode and $details_mode);
1137    my $dont_act = 1 if ($print_mode or $details_mode);
1138    pod2usage({
1139            -exitval => 3,
1140            -message =>
1141"--source argument must be one of never, auto, download-only, and always\n",
1142        }) unless ($unpack_source =~ /^(never|auto|download-only|always)$/);
1143
1144    # -u|--user implies -a|--auth
1145    $auth = 1 if length $user;
1146
1147    # set user from the config file to be used with -a|--auth without -u|--user
1148    $user = $config_vars{DEBCHECKOUT_USER} unless $user;
1149
1150    $destdir = $ARGV[1] if $#ARGV > 0;
1151    ($pkg, $version) = split(/=/, $ARGV[0]);
1152    $version ||= "";
1153
1154    if (not is_package($pkg)) {    # repo-url passed on the command line
1155        $repo_url  = $ARGV[0];
1156        $repo_type = guess_repo_type($repo_url, $repo_type);
1157        $pkg       = "";
1158        $version   = "";
1159        # when --package is given, use it
1160        if ($use_package) {
1161            $pkg = $use_package;
1162            # else guess package from url
1163        } elsif ($repo_url =~ m!/trunk/([a-z0-9.+-]+)!)
1164        {                          # svn with {trunk,tags,branches}/$pkg
1165            $pkg = $1;
1166        } elsif ($repo_url =~ m!([a-z0-9.+-]+)/trunk/?!)
1167        {                          # svn with $pkg/{trunk,tags,branches}
1168            $pkg = $1;
1169        } elsif ($repo_url =~ /([a-z0-9.+-]+)\.git(\s+-b\s+.*)?$/) {    # git
1170            $pkg = $1;
1171        } elsif ($repo_url =~ /([a-z0-9.+-]+)$/) {    # catch-all
1172            $pkg = $1;
1173        }
1174        $origtgz_name = $pkg
1175          ;  # FIXME: this should rather set srcpkg in unpack_source() directly
1176    } else {    # package name passed on the command line
1177        ($version, $repo_type, $repo_url, $origtgz_name)
1178          = find_repo($pkg, $version);
1179        unless ($repo_type) {
1180            my $vermsg = "";
1181            $vermsg = ", version $version" if length $version;
1182            print <<EOF;
1183No repository found for package $pkg$vermsg.
1184
1185A Vcs-* field is missing in its source record. See Debian Developer's
1186Reference 6.2.5:
1187 `https://www.debian.org/doc/developers-reference/best-pkging-practices.html#bpp-vcs'
1188If you know that the package is maintained via a version control
1189system consider asking the maintainer to expose such information.
1190
1191Nevertheless, you can get the sources of package $pkg
1192from the Debian archive executing:
1193
1194 apt-get source $pkg
1195
1196Note however that what you obtain will *not* be a local copy of
1197some version control system: your changes will not be preserved
1198and it will not be possible to commit them directly.
1199
1200EOF
1201            exit(1);
1202        }
1203        $browse_url = find_browse($pkg, $version) if @files;
1204    }
1205
1206    $repo_url = munge_url($repo_type, $repo_url);
1207    if ($auth and not @files) {
1208        $anon_repo_url = $repo_url;
1209        $repo_url      = set_auth($repo_type, $repo_url, $user, $dont_act);
1210    }
1211    print_repo($repo_type, $repo_url)    if $print_mode;      # ... then quit
1212    print_details($repo_type, $repo_url) if $details_mode;    # ... then quit
1213    if (length $pkg) {
1214        print "declared $repo_type repository at $repo_url\n";
1215        $destdir = $pkg unless length $destdir;
1216    }
1217    my $rc;
1218    if (@files) {
1219        $rc = checkout_files($repo_type, $repo_url, $destdir, $browse_url);
1220    } else {
1221        $rc = checkout_repo($repo_type, $repo_url, $destdir, $anon_repo_url);
1222    }    # XXX: there is no way to know for sure what is the destdir :-(
1223    die "checkout failed (the command above returned a non-zero exit code)\n"
1224      if $rc != 0;
1225
1226    # post-checkout actions
1227    if ($repo_type eq 'bzr' and $auth) {
1228        if (open B, '>>', "$destdir/.bzr/branch/branch.conf") {
1229            print B "\npush_location = $repo_url";
1230            close B;
1231        } else {
1232            print STDERR
1233              "failed to open branch.conf to add push_location: $!\n";
1234        }
1235    } elsif ($repo_type eq 'git') {
1236        my $tg_info = tg_info($repo_url);
1237        my $wcdir   = $destdir;
1238        # HACK: if $destdir is unknown, take last URL part and remove /.git$/
1239        $wcdir = (split m|\.|, (split m|/|, $repo_url)[-1])[0]
1240          unless length $wcdir;
1241        if ($$tg_info{'topgit'} eq 'yes') {
1242            print "TopGit detected, populating top-bases ...\n";
1243            system("cd $wcdir && tg remote --populate origin");
1244            $rc = errorcode();
1245            print STDERR "TopGit population failed\n" if $rc != 0;
1246        }
1247        system("cd $wcdir && git config user.name \"$ENV{'DEBFULLNAME'}\"")
1248          if (defined($ENV{'DEBFULLNAME'}));
1249        system("cd $wcdir && git config user.email \"$ENV{'DEBEMAIL'}\"")
1250          if (defined($ENV{'DEBEMAIL'}));
1251        if (length $git_track) {
1252            my @heads;
1253            if ($git_track eq '*') {
1254                @heads = git_ls_remote($repo_url, 'refs/heads');
1255            } else {
1256                @heads = split ' ', $git_track;
1257            }
1258            # Filter out any branches already populated via TopGit
1259            my @tgheads = split ' ', $$tg_info{'top-bases'};
1260            my $master  = 'master';
1261            if (
1262                open(HEAD,
1263                    "env GIT_DIR=\"$wcdir/.git\" git symbolic-ref HEAD |"
1264                )
1265            ) {
1266                $master = <HEAD>;
1267                chomp $master;
1268                $master =~ s@refs/heads/@@;
1269            }
1270            close(HEAD);
1271            foreach my $head (@heads) {
1272                next if $head eq $master;
1273                next if grep { $head eq $_ } @tgheads;
1274                my $cmd = "cd $wcdir";
1275                $cmd .= " && git branch --track $head remotes/origin/$head";
1276                system($cmd);
1277            }
1278        }
1279    } elsif ($repo_type eq 'hg') {
1280        my $username = '';
1281        $username .= " $ENV{'DEBFULLNAME'}" if (defined($ENV{'DEBFULLNAME'}));
1282        $username .= " <$ENV{'DEBEMAIL'}>"  if (defined($ENV{'DEBEMAIL'}));
1283        if ($username) {
1284            if (open(HGRC, '>>', "$destdir/.hg/hgrc")) {
1285                print HGRC "[ui]\nusername =$username\n";
1286                close HGRC;
1287            } else {
1288                print STDERR "failed to open hgrc to set username: $!\n";
1289            }
1290        }
1291    }
1292    die "post-checkout action failed\n"
1293      if $rc != 0;
1294
1295    if ($unpack_source) {
1296        unless ($pkg) {
1297            print STDERR
1298              "could not determine package name for orig.tar.gz retrieval\n";
1299            $rc ||= 1;
1300            exit($rc);
1301        }
1302        unpack_source($pkg, $version, $destdir, $origtgz_name, $unpack_source)
1303          or $rc = 1;
1304    }
1305
1306    exit($rc);
1307}
1308
1309main();
1310
1311# vim:sw=4
1312