1#!/usr/local/bin/perl
2
3## tar archive frontend for git-fast-import
4##
5## For example:
6##
7##  mkdir project; cd project; git init
8##  perl import-tars.perl *.tar.bz2
9##  git whatchanged import-tars
10##
11## Use --metainfo to specify the extension for a meta data file, where
12## import-tars can read the commit message and optionally author and
13## committer information.
14##
15##  echo 'This is the commit message' > myfile.tar.bz2.msg
16##  perl import-tars.perl --metainfo=msg myfile.tar.bz2
17
18use strict;
19use Getopt::Long;
20
21my $metaext = '';
22
23die "usage: import-tars [--metainfo=extension] *.tar.{gz,bz2,lzma,xz,Z}\n"
24	unless GetOptions('metainfo=s' => \$metaext) && @ARGV;
25
26my $branch_name = 'import-tars';
27my $branch_ref = "refs/heads/$branch_name";
28my $author_name = $ENV{'GIT_AUTHOR_NAME'} || 'T Ar Creator';
29my $author_email = $ENV{'GIT_AUTHOR_EMAIL'} || 'tar@example.com';
30my $committer_name = $ENV{'GIT_COMMITTER_NAME'} || `git config --get user.name`;
31my $committer_email = $ENV{'GIT_COMMITTER_EMAIL'} || `git config --get user.email`;
32
33chomp($committer_name, $committer_email);
34
35open(FI, '|-', 'git', 'fast-import', '--quiet')
36	or die "Unable to start git fast-import: $!\n";
37foreach my $tar_file (@ARGV)
38{
39	my $commit_time = time;
40	$tar_file =~ m,([^/]+)$,;
41	my $tar_name = $1;
42
43	if ($tar_name =~ s/\.(tar\.gz|tgz)$//) {
44		open(I, '-|', 'gunzip', '-c', $tar_file)
45			or die "Unable to gunzip -c $tar_file: $!\n";
46	} elsif ($tar_name =~ s/\.(tar\.bz2|tbz2)$//) {
47		open(I, '-|', 'bunzip2', '-c', $tar_file)
48			or die "Unable to bunzip2 -c $tar_file: $!\n";
49	} elsif ($tar_name =~ s/\.tar\.Z$//) {
50		open(I, '-|', 'uncompress', '-c', $tar_file)
51			or die "Unable to uncompress -c $tar_file: $!\n";
52	} elsif ($tar_name =~ s/\.(tar\.(lzma|xz)|(tlz|txz))$//) {
53		open(I, '-|', 'xz', '-dc', $tar_file)
54			or die "Unable to xz -dc $tar_file: $!\n";
55	} elsif ($tar_name =~ s/\.tar$//) {
56		open(I, $tar_file) or die "Unable to open $tar_file: $!\n";
57	} else {
58		die "Unrecognized compression format: $tar_file\n";
59	}
60
61	my $author_time = 0;
62	my $next_mark = 1;
63	my $have_top_dir = 1;
64	my ($top_dir, %files);
65
66	my $next_path = '';
67
68	while (read(I, $_, 512) == 512) {
69		my ($name, $mode, $uid, $gid, $size, $mtime,
70			$chksum, $typeflag, $linkname, $magic,
71			$version, $uname, $gname, $devmajor, $devminor,
72			$prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
73			Z8 Z1 Z100 Z6
74			Z2 Z32 Z32 Z8 Z8 Z*', $_;
75
76		unless ($next_path eq '') {
77			# Recover name from previous extended header
78			$name = $next_path;
79			$next_path = '';
80		}
81
82		last unless length($name);
83		if ($name eq '././@LongLink') {
84			# GNU tar extension
85			if (read(I, $_, 512) != 512) {
86				die ('Short archive');
87			}
88			$name = unpack 'Z257', $_;
89			next unless $name;
90
91			my $dummy;
92			if (read(I, $_, 512) != 512) {
93				die ('Short archive');
94			}
95			($dummy, $mode, $uid, $gid, $size, $mtime,
96			$chksum, $typeflag, $linkname, $magic,
97			$version, $uname, $gname, $devmajor, $devminor,
98			$prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
99			Z8 Z1 Z100 Z6
100			Z2 Z32 Z32 Z8 Z8 Z*', $_;
101		}
102		$mode = oct $mode;
103		$size = oct $size;
104		$mtime = oct $mtime;
105		next if $typeflag == 5; # directory
106
107		if ($typeflag eq 'x') { # extended header
108			# If extended header, check for path
109			my $pax_header = '';
110			while ($size > 0 && read(I, $_, 512) == 512) {
111				$pax_header = $pax_header . substr($_, 0, $size);
112				$size -= 512;
113			}
114
115			my @lines = split /\n/, $pax_header;
116			foreach my $line (@lines) {
117				my ($len, $entry) = split / /, $line;
118				my ($key, $value) = split /=/, $entry;
119				if ($key eq 'path') {
120					$next_path = $value;
121				}
122			}
123			next;
124		} elsif ($name =~ m{/\z}) { # directory
125			next;
126		} elsif ($typeflag != 1) { # handle hard links later
127			print FI "blob\n", "mark :$next_mark\n";
128			if ($typeflag == 2) { # symbolic link
129				print FI "data ", length($linkname), "\n",
130					$linkname;
131				$mode = 0120000;
132			} else {
133				print FI "data $size\n";
134				while ($size > 0 && read(I, $_, 512) == 512) {
135					print FI substr($_, 0, $size);
136					$size -= 512;
137				}
138			}
139			print FI "\n";
140		}
141
142		next if ($typeflag eq 'g'); # ignore global header
143
144		my $path;
145		if ($prefix) {
146			$path = "$prefix/$name";
147		} else {
148			$path = "$name";
149		}
150
151		if ($typeflag == 1) { # hard link
152			$linkname = "$prefix/$linkname" if $prefix;
153			$files{$path} = [ $files{$linkname}->[0], $mode ];
154		} else {
155			$files{$path} = [$next_mark++, $mode];
156		}
157
158		$author_time = $mtime if $mtime > $author_time;
159		$path =~ m,^([^/]+)/,;
160		$top_dir = $1 unless $top_dir;
161		$have_top_dir = 0 if $top_dir ne $1;
162	}
163
164	my $commit_msg = "Imported from $tar_file.";
165	my $this_committer_name = $committer_name;
166	my $this_committer_email = $committer_email;
167	my $this_author_name = $author_name;
168	my $this_author_email = $author_email;
169	if ($metaext ne '') {
170		# Optionally read a commit message from <filename.tar>.msg
171		# Add a line on the form "Committer: name <e-mail>" to override
172		# the committer and "Author: name <e-mail>" to override the
173		# author for this tar ball.
174		if (open MSG, '<', "${tar_file}.${metaext}") {
175			my $header_done = 0;
176			$commit_msg = '';
177			while (<MSG>) {
178				if (!$header_done && /^Committer:\s+([^<>]*)\s+<(.*)>\s*$/i) {
179					$this_committer_name = $1;
180					$this_committer_email = $2;
181				} elsif (!$header_done && /^Author:\s+([^<>]*)\s+<(.*)>\s*$/i) {
182					$this_author_name = $1;
183					$this_author_email = $2;
184				} elsif (!$header_done && /^$/) { # empty line ends header.
185					$header_done = 1;
186				} else {
187					$commit_msg .= $_;
188					$header_done = 1;
189				}
190			}
191			close MSG;
192		}
193	}
194
195	print FI <<EOF;
196commit $branch_ref
197author $this_author_name <$this_author_email> $author_time +0000
198committer $this_committer_name <$this_committer_email> $commit_time +0000
199data <<END_OF_COMMIT_MESSAGE
200$commit_msg
201END_OF_COMMIT_MESSAGE
202
203deleteall
204EOF
205
206	foreach my $path (keys %files)
207	{
208		my ($mark, $mode) = @{$files{$path}};
209		$path =~ s,^([^/]+)/,, if $have_top_dir;
210		$mode = $mode & 0111 ? 0755 : 0644 unless $mode == 0120000;
211		printf FI "M %o :%i %s\n", $mode, $mark, $path;
212	}
213	print FI "\n";
214
215	print FI <<EOF;
216tag $tar_name
217from $branch_ref
218tagger $author_name <$author_email> $author_time +0000
219data <<END_OF_TAG_MESSAGE
220Package $tar_name
221END_OF_TAG_MESSAGE
222
223EOF
224
225	close I;
226}
227close FI;
228