1# Copyright © 2009 Raphaël Hertzog <hertzog@debian.org>
2# Copyright © 2012-2013 Guillem Jover <guillem@debian.org>
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17package Dpkg::Changelog::Entry::Debian;
18
19use strict;
20use warnings;
21
22our $VERSION = '1.03';
23our @EXPORT_OK = qw(
24    $regex_header
25    $regex_trailer
26    match_header
27    match_trailer
28    find_closes
29);
30
31use Exporter qw(import);
32use Time::Piece;
33
34use Dpkg::Gettext;
35use Dpkg::Control::Fields;
36use Dpkg::Control::Changelog;
37use Dpkg::Changelog::Entry;
38use Dpkg::Version;
39
40use parent qw(Dpkg::Changelog::Entry);
41
42=encoding utf8
43
44=head1 NAME
45
46Dpkg::Changelog::Entry::Debian - represents a Debian changelog entry
47
48=head1 DESCRIPTION
49
50This object represents a Debian changelog entry. It implements the
51generic interface Dpkg::Changelog::Entry. Only functions specific to this
52implementation are described below.
53
54=cut
55
56my $name_chars = qr/[-+0-9a-z.]/i;
57
58# XXX: Backwards compatibility, stop exporting on VERSION 2.00.
59## no critic (Variables::ProhibitPackageVars)
60
61# The matched content is the source package name ($1), the version ($2),
62# the target distributions ($3) and the options on the rest of the line ($4).
63our $regex_header = qr{
64    ^
65    (\w$name_chars*)                    # Package name
66    \ \(([^\(\) \t]+)\)                 # Package version
67    ((?:\s+$name_chars+)+)              # Target distribution
68    \;                                  # Separator
69    (.*?)                               # Key=Value options
70    \s*$                                # Trailing space
71}xi;
72
73# The matched content is the maintainer name ($1), its email ($2),
74# some blanks ($3) and the timestamp ($4), which is decomposed into
75# day of week ($6), date-time ($7) and this into month name ($8).
76our $regex_trailer = qr<
77    ^
78    \ \-\-                              # Trailer marker
79    \ (.*)                              # Maintainer name
80    \ \<(.*)\>                          # Maintainer email
81    (\ \ ?)                             # Blanks
82    (
83      ((\w+)\,\s*)?                     # Day of week (abbreviated)
84      (
85        \d{1,2}\s+                      # Day of month
86        (\w+)\s+                        # Month name (abbreviated)
87        \d{4}\s+                        # Year
88        \d{1,2}:\d\d:\d\d\s+[-+]\d{4}   # ISO 8601 date
89      )
90    )
91    \s*$                                # Trailing space
92>xo;
93
94my %week_day = map { $_ => 1 } qw(Mon Tue Wed Thu Fri Sat Sun);
95my %month_abbrev = map { $_ => 1 } qw(
96    Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
97);
98my %month_name = map { $_ => } qw(
99    January February March April May June July
100    August September October November December
101);
102
103## use critic
104
105=head1 METHODS
106
107=over 4
108
109=item @items = $entry->get_change_items()
110
111Return a list of change items. Each item contains at least one line.
112A change line starting with an asterisk denotes the start of a new item.
113Any change line like "C<[ Raphaël Hertzog ]>" is treated like an item of its
114own even if it starts a set of items attributed to this person (the
115following line necessarily starts a new item).
116
117=cut
118
119sub get_change_items {
120    my $self = shift;
121    my (@items, @blanks, $item);
122    foreach my $line (@{$self->get_part('changes')}) {
123	if ($line =~ /^\s*\*/) {
124	    push @items, $item if defined $item;
125	    $item = "$line\n";
126	} elsif ($line =~ /^\s*\[\s[^\]]+\s\]\s*$/) {
127	    push @items, $item if defined $item;
128	    push @items, "$line\n";
129	    $item = undef;
130	    @blanks = ();
131	} elsif ($line =~ /^\s*$/) {
132	    push @blanks, "$line\n";
133	} else {
134	    if (defined $item) {
135		$item .= "@blanks$line\n";
136	    } else {
137		$item = "$line\n";
138	    }
139	    @blanks = ();
140	}
141    }
142    push @items, $item if defined $item;
143    return @items;
144}
145
146=item @errors = $entry->parse_header()
147
148=item @errors = $entry->parse_trailer()
149
150Return a list of errors. Each item in the list is an error message
151describing the problem. If the empty list is returned, no errors
152have been found.
153
154=cut
155
156sub parse_header {
157    my $self = shift;
158    my @errors;
159    if (defined($self->{header}) and $self->{header} =~ $regex_header) {
160	$self->{header_source} = $1;
161
162	my $version = Dpkg::Version->new($2);
163	my ($ok, $msg) = version_check($version);
164	if ($ok) {
165	    $self->{header_version} = $version;
166	} else {
167	    push @errors, sprintf(g_("version '%s' is invalid: %s"), $version, $msg);
168	}
169
170	@{$self->{header_dists}} = split ' ', $3;
171
172	my $options = $4;
173	$options =~ s/^\s+//;
174	my $f = Dpkg::Control::Changelog->new();
175	foreach my $opt (split(/\s*,\s*/, $options)) {
176	    unless ($opt =~ m/^([-0-9a-z]+)\=\s*(.*\S)$/i) {
177		push @errors, sprintf(g_("bad key-value after ';': '%s'"), $opt);
178		next;
179	    }
180	    my ($k, $v) = (field_capitalize($1), $2);
181	    if (exists $f->{$k}) {
182		push @errors, sprintf(g_('repeated key-value %s'), $k);
183	    } else {
184		$f->{$k} = $v;
185	    }
186	    if ($k eq 'Urgency') {
187		push @errors, sprintf(g_('badly formatted urgency value: %s'), $v)
188		    unless ($v =~ m/^([-0-9a-z]+)((\s+.*)?)$/i);
189	    } elsif ($k eq 'Binary-Only') {
190		push @errors, sprintf(g_('bad binary-only value: %s'), $v)
191		    unless ($v eq 'yes');
192	    } elsif ($k =~ m/^X[BCS]+-/i) {
193	    } else {
194		push @errors, sprintf(g_('unknown key-value %s'), $k);
195	    }
196	}
197	$self->{header_fields} = $f;
198    } else {
199	push @errors, g_("the header doesn't match the expected regex");
200    }
201    return @errors;
202}
203
204sub parse_trailer {
205    my $self = shift;
206    my @errors;
207    if (defined($self->{trailer}) and $self->{trailer} =~ $regex_trailer) {
208	$self->{trailer_maintainer} = "$1 <$2>";
209
210	if ($3 ne '  ') {
211	    push @errors, g_('badly formatted trailer line');
212	}
213
214	# Validate the week day. Date::Parse used to ignore it, but Time::Piece
215	# is much more strict and it does not gracefully handle bogus values.
216	if (defined $5 and not exists $week_day{$6}) {
217	    push @errors, sprintf(g_('ignoring invalid week day \'%s\''), $6);
218	}
219
220	# Ignore the week day ('%a, '), as we have validated it above.
221	local $ENV{LC_ALL} = 'C';
222	eval {
223	    my $tp = Time::Piece->strptime($7, '%d %b %Y %T %z');
224	    $self->{trailer_timepiece} = $tp;
225	} or do {
226	    # Validate the month. Date::Parse used to accept both abbreviated
227	    # and full months, but Time::Piece strptime() implementation only
228	    # matches the abbreviated one with %b, which is what we want anyway.
229	    if (not exists $month_abbrev{$8}) {
230	        # We have to nest the conditionals because May is the same in
231	        # full and abbreviated forms!
232	        if (exists $month_name{$8}) {
233	            push @errors, sprintf(g_('uses full instead of abbreviated month name \'%s\''),
234	                                  $8, $month_name{$8});
235	        } else {
236	            push @errors, sprintf(g_('invalid abbreviated month name \'%s\''), $8);
237	        }
238	    }
239	    push @errors, sprintf(g_("cannot parse non-conformant date '%s'"), $7);
240	};
241	$self->{trailer_timestamp_date} = $4;
242    } else {
243	push @errors, g_("the trailer doesn't match the expected regex");
244    }
245    return @errors;
246}
247
248=item $entry->check_header()
249
250Obsolete method. Use parse_header() instead.
251
252=cut
253
254sub check_header {
255    my $self = shift;
256
257    warnings::warnif('deprecated',
258                     'obsolete check_header(), use parse_header() instead');
259
260    return $self->parse_header();
261}
262
263=item $entry->check_trailer()
264
265Obsolete method. Use parse_trailer() instead.
266
267=cut
268
269sub check_trailer {
270    my $self = shift;
271
272    warnings::warnif('deprecated',
273                     'obsolete check_trailer(), use parse_trailer() instead');
274
275    return $self->parse_header();
276}
277
278=item $entry->normalize()
279
280Normalize the content. Strip whitespaces at end of lines, use a single
281empty line to separate each part.
282
283=cut
284
285sub normalize {
286    my $self = shift;
287    $self->SUPER::normalize();
288    #XXX: recreate header/trailer
289}
290
291=item $src = $entry->get_source()
292
293Return the name of the source package associated to the changelog entry.
294
295=cut
296
297sub get_source {
298    my $self = shift;
299
300    return $self->{header_source};
301}
302
303=item $ver = $entry->get_version()
304
305Return the version associated to the changelog entry.
306
307=cut
308
309sub get_version {
310    my $self = shift;
311
312    return $self->{header_version};
313}
314
315=item @dists = $entry->get_distributions()
316
317Return a list of target distributions for this version.
318
319=cut
320
321sub get_distributions {
322    my $self = shift;
323
324    if (defined $self->{header_dists}) {
325        return @{$self->{header_dists}} if wantarray;
326        return $self->{header_dists}[0];
327    }
328    return;
329}
330
331=item $fields = $entry->get_optional_fields()
332
333Return a set of optional fields exposed by the changelog entry.
334It always returns a Dpkg::Control object (possibly empty though).
335
336=cut
337
338sub get_optional_fields {
339    my $self = shift;
340    my $f;
341
342    if (defined $self->{header_fields}) {
343        $f = $self->{header_fields};
344    } else {
345        $f = Dpkg::Control::Changelog->new();
346    }
347
348    my @closes = find_closes(join("\n", @{$self->{changes}}));
349    if (@closes) {
350	$f->{Closes} = join(' ', @closes);
351    }
352
353    return $f;
354}
355
356=item $urgency = $entry->get_urgency()
357
358Return the urgency of the associated upload.
359
360=cut
361
362sub get_urgency {
363    my $self = shift;
364    my $f = $self->get_optional_fields();
365    if (exists $f->{Urgency}) {
366	$f->{Urgency} =~ s/\s.*$//;
367	return lc($f->{Urgency});
368    }
369    return;
370}
371
372=item $maint = $entry->get_maintainer()
373
374Return the string identifying the person who signed this changelog entry.
375
376=cut
377
378sub get_maintainer {
379    my $self = shift;
380
381    return $self->{trailer_maintainer};
382}
383
384=item $time = $entry->get_timestamp()
385
386Return the timestamp of the changelog entry.
387
388=cut
389
390sub get_timestamp {
391    my $self = shift;
392
393    return $self->{trailer_timestamp_date};
394}
395
396=item $time = $entry->get_timepiece()
397
398Return the timestamp of the changelog entry as a Time::Piece object.
399
400This function might return undef if there was no timestamp.
401
402=cut
403
404sub get_timepiece {
405    my $self = shift;
406
407    return $self->{trailer_timepiece};
408}
409
410=back
411
412=head1 UTILITY FUNCTIONS
413
414=over 4
415
416=item $bool = match_header($line)
417
418Checks if the line matches a valid changelog header line.
419
420=cut
421
422sub match_header {
423    my $line = shift;
424
425    return $line =~ /$regex_header/;
426}
427
428=item $bool = match_trailer($line)
429
430Checks if the line matches a valid changelog trailing line.
431
432=cut
433
434sub match_trailer {
435    my $line = shift;
436
437    return $line =~ /$regex_trailer/;
438}
439
440=item @closed_bugs = find_closes($changes)
441
442Takes one string as argument and finds "Closes: #123456, #654321" statements
443as supported by the Debian Archive software in it. Returns all closed bug
444numbers in an array.
445
446=cut
447
448sub find_closes {
449    my $changes = shift;
450    my %closes;
451
452    while ($changes && ($changes =~ m{
453               closes:\s*
454               (?:bug)?\#?\s?\d+
455               (?:,\s*(?:bug)?\#?\s?\d+)*
456           }pigx)) {
457        $closes{$_} = 1 foreach (${^MATCH} =~ /\#?\s?(\d+)/g);
458    }
459
460    my @closes = sort { $a <=> $b } keys %closes;
461    return @closes;
462}
463
464=back
465
466=head1 CHANGES
467
468=head2 Version 1.03 (dpkg 1.18.8)
469
470New methods: $entry->get_timepiece().
471
472=head2 Version 1.02 (dpkg 1.18.5)
473
474New methods: $entry->parse_header(), $entry->parse_trailer().
475
476Deprecated methods: $entry->check_header(), $entry->check_trailer().
477
478=head2 Version 1.01 (dpkg 1.17.2)
479
480New functions: match_header(), match_trailer()
481
482Deprecated variables: $regex_header, $regex_trailer
483
484=head2 Version 1.00 (dpkg 1.15.6)
485
486Mark the module as public.
487
488=cut
489
4901;
491