1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4#
5# This Source Code Form is "Incompatible With Secondary Licenses", as
6# defined by the Mozilla Public License, v. 2.0.
7
8package Bugzilla::Util;
9
10use strict;
11
12use base qw(Exporter);
13@Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed
14                             html_quote url_quote xml_quote
15                             css_class_quote html_light_quote
16                             i_am_cgi i_am_webservice correct_urlbase remote_ip
17                             validate_ip do_ssl_redirect_if_required use_attachbase
18                             diff_arrays on_main_db say
19                             trim wrap_hard wrap_comment find_wrap_point
20                             format_time validate_date validate_time datetime_from
21                             is_7bit_clean bz_crypt generate_random_password
22                             validate_email_syntax check_email_syntax clean_text
23                             get_text template_var disable_utf8
24                             detect_encoding
25                             join_activity_entries);
26
27use Bugzilla::Constants;
28use Bugzilla::RNG qw(irand);
29use Bugzilla::Error;
30
31use Date::Parse;
32use Date::Format;
33use Digest;
34use Email::Address;
35use List::Util qw(first);
36use Scalar::Util qw(tainted blessed);
37use Template::Filters;
38use Text::Wrap;
39use Encode qw(encode decode resolve_alias);
40use Encode::Guess;
41
42sub trick_taint {
43    require Carp;
44    Carp::confess("Undef to trick_taint") unless defined $_[0];
45    my $match = $_[0] =~ /^(.*)$/s;
46    $_[0] = $match ? $1 : undef;
47    return (defined($_[0]));
48}
49
50sub detaint_natural {
51    my $match = $_[0] =~ /^(\d+)$/;
52    $_[0] = $match ? int($1) : undef;
53    return (defined($_[0]));
54}
55
56sub detaint_signed {
57    my $match = $_[0] =~ /^([-+]?\d+)$/;
58    # The "int()" call removes any leading plus sign.
59    $_[0] = $match ? int($1) : undef;
60    return (defined($_[0]));
61}
62
63# Bug 120030: Override html filter to obscure the '@' in user
64#             visible strings.
65# Bug 319331: Handle BiDi disruptions.
66sub html_quote {
67    my ($var) = Template::Filters::html_filter(@_);
68    # Obscure '@'.
69    $var =~ s/\@/\@/g;
70    if (Bugzilla->params->{'utf8'}) {
71        # Remove control characters if the encoding is utf8.
72        # Other multibyte encodings may be using this range; so ignore if not utf8.
73        $var =~ s/(?![\t\r\n])[[:cntrl:]]//g;
74
75        # Remove the following characters because they're
76        # influencing BiDi:
77        # --------------------------------------------------------
78        # |Code  |Name                      |UTF-8 representation|
79        # |------|--------------------------|--------------------|
80        # |U+202a|Left-To-Right Embedding   |0xe2 0x80 0xaa      |
81        # |U+202b|Right-To-Left Embedding   |0xe2 0x80 0xab      |
82        # |U+202c|Pop Directional Formatting|0xe2 0x80 0xac      |
83        # |U+202d|Left-To-Right Override    |0xe2 0x80 0xad      |
84        # |U+202e|Right-To-Left Override    |0xe2 0x80 0xae      |
85        # --------------------------------------------------------
86        #
87        # The following are characters influencing BiDi, too, but
88        # they can be spared from filtering because they don't
89        # influence more than one character right or left:
90        # --------------------------------------------------------
91        # |Code  |Name                      |UTF-8 representation|
92        # |------|--------------------------|--------------------|
93        # |U+200e|Left-To-Right Mark        |0xe2 0x80 0x8e      |
94        # |U+200f|Right-To-Left Mark        |0xe2 0x80 0x8f      |
95        # --------------------------------------------------------
96        $var =~ s/[\x{202a}-\x{202e}]//g;
97    }
98    return $var;
99}
100
101sub html_light_quote {
102    my ($text) = @_;
103    # admin/table.html.tmpl calls |FILTER html_light| many times.
104    # There is no need to recreate the HTML::Scrubber object again and again.
105    my $scrubber = Bugzilla->process_cache->{html_scrubber};
106
107    # List of allowed HTML elements having no attributes.
108    my @allow = qw(b strong em i u p br abbr acronym ins del cite code var
109                   dfn samp kbd big small sub sup tt dd dt dl ul li ol
110                   fieldset legend);
111
112    if (!Bugzilla->feature('html_desc')) {
113        my $safe = join('|', @allow);
114        my $chr = chr(1);
115
116        # First, escape safe elements.
117        $text =~ s#<($safe)>#$chr$1$chr#go;
118        $text =~ s#</($safe)>#$chr/$1$chr#go;
119        # Now filter < and >.
120        $text =~ s#<#&lt;#g;
121        $text =~ s#>#&gt;#g;
122        # Restore safe elements.
123        $text =~ s#$chr/($safe)$chr#</$1>#go;
124        $text =~ s#$chr($safe)$chr#<$1>#go;
125        return $text;
126    }
127    elsif (!$scrubber) {
128        # We can be less restrictive. We can accept elements with attributes.
129        push(@allow, qw(a blockquote q span));
130
131        # Allowed protocols.
132        my $safe_protocols = join('|', SAFE_PROTOCOLS);
133        my $protocol_regexp = qr{(^(?:$safe_protocols):|^[^:]+$)}i;
134
135        # Deny all elements and attributes unless explicitly authorized.
136        my @default = (0 => {
137                             id    => 1,
138                             name  => 1,
139                             class => 1,
140                             '*'   => 0, # Reject all other attributes.
141                            }
142                       );
143
144        # Specific rules for allowed elements. If no specific rule is set
145        # for a given element, then the default is used.
146        my @rules = (a => {
147                           href   => $protocol_regexp,
148                           target => qr{^(?:_blank|_parent|_self|_top)$}i,
149                           title  => 1,
150                           id     => 1,
151                           name   => 1,
152                           class  => 1,
153                           '*'    => 0, # Reject all other attributes.
154                          },
155                     blockquote => {
156                                    cite => $protocol_regexp,
157                                    id    => 1,
158                                    name  => 1,
159                                    class => 1,
160                                    '*'  => 0, # Reject all other attributes.
161                                   },
162                     'q' => {
163                             cite => $protocol_regexp,
164                             id    => 1,
165                             name  => 1,
166                             class => 1,
167                             '*'  => 0, # Reject all other attributes.
168                          },
169                    );
170
171        Bugzilla->process_cache->{html_scrubber} = $scrubber =
172          HTML::Scrubber->new(default => \@default,
173                              allow   => \@allow,
174                              rules   => \@rules,
175                              comment => 0,
176                              process => 0);
177    }
178    return $scrubber->scrub($text);
179}
180
181sub email_filter {
182    my ($toencode) = @_;
183    if (!Bugzilla->user->id) {
184        my @emails = Email::Address->parse($toencode);
185        if (scalar @emails) {
186            my @hosts = map { quotemeta($_->host) } @emails;
187            my $hosts_re = join('|', @hosts);
188            $toencode =~ s/\@(?:$hosts_re)//g;
189            return $toencode;
190        }
191    }
192    return $toencode;
193}
194
195# This originally came from CGI.pm, by Lincoln D. Stein
196sub url_quote {
197    my ($toencode) = (@_);
198    utf8::encode($toencode) # The below regex works only on bytes
199        if Bugzilla->params->{'utf8'} && utf8::is_utf8($toencode);
200    $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg;
201    return $toencode;
202}
203
204sub css_class_quote {
205    my ($toencode) = (@_);
206    $toencode =~ s#[ /]#_#g;
207    $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("&#x%x;",ord($1))/eg;
208    return $toencode;
209}
210
211sub xml_quote {
212    my ($var) = (@_);
213    $var =~ s/\&/\&amp;/g;
214    $var =~ s/</\&lt;/g;
215    $var =~ s/>/\&gt;/g;
216    $var =~ s/\"/\&quot;/g;
217    $var =~ s/\'/\&apos;/g;
218
219    # the following nukes characters disallowed by the XML 1.0
220    # spec, Production 2.2. 1.0 declares that only the following
221    # are valid:
222    # (#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF])
223    $var =~ s/([\x{0001}-\x{0008}]|
224               [\x{000B}-\x{000C}]|
225               [\x{000E}-\x{001F}]|
226               [\x{D800}-\x{DFFF}]|
227               [\x{FFFE}-\x{FFFF}])//gx;
228    return $var;
229}
230
231sub i_am_cgi {
232    # I use SERVER_SOFTWARE because it's required to be
233    # defined for all requests in the CGI spec.
234    return exists $ENV{'SERVER_SOFTWARE'} ? 1 : 0;
235}
236
237sub i_am_webservice {
238    my $usage_mode = Bugzilla->usage_mode;
239    return $usage_mode == USAGE_MODE_XMLRPC
240           || $usage_mode == USAGE_MODE_JSON;
241}
242
243# This exists as a separate function from Bugzilla::CGI::redirect_to_https
244# because we don't want to create a CGI object during XML-RPC calls
245# (doing so can mess up XML-RPC).
246sub do_ssl_redirect_if_required {
247    return if !i_am_cgi();
248    return if !Bugzilla->params->{'ssl_redirect'};
249
250    my $sslbase = Bugzilla->params->{'sslbase'};
251
252    # If we're already running under SSL, never redirect.
253    return if uc($ENV{HTTPS} || '') eq 'ON';
254    # Never redirect if there isn't an sslbase.
255    return if !$sslbase;
256    Bugzilla->cgi->redirect_to_https();
257}
258
259sub correct_urlbase {
260    my $ssl = Bugzilla->params->{'ssl_redirect'};
261    my $urlbase = Bugzilla->params->{'urlbase'};
262    my $sslbase = Bugzilla->params->{'sslbase'};
263
264    if (!$sslbase) {
265        return $urlbase;
266    }
267    elsif ($ssl) {
268        return $sslbase;
269    }
270    else {
271        # Return what the user currently uses.
272        return (uc($ENV{HTTPS} || '') eq 'ON') ? $sslbase : $urlbase;
273    }
274}
275
276sub remote_ip {
277    my $ip = $ENV{'REMOTE_ADDR'} || '127.0.0.1';
278    my @proxies = split(/[\s,]+/, Bugzilla->params->{'inbound_proxies'});
279
280    # If the IP address is one of our trusted proxies, then we look at
281    # the X-Forwarded-For header to determine the real remote IP address.
282    if ($ENV{'HTTP_X_FORWARDED_FOR'} && first { $_ eq $ip } @proxies) {
283        my @ips = split(/[\s,]+/, $ENV{'HTTP_X_FORWARDED_FOR'});
284        # This header can contain several IP addresses. We want the
285        # IP address of the machine which connected to our proxies as
286        # all other IP addresses may be fake or internal ones.
287        # Note that this may block a whole external proxy, but we have
288        # no way to determine if this proxy is malicious or trustable.
289        foreach my $remote_ip (reverse @ips) {
290            if (!first { $_ eq $remote_ip } @proxies) {
291                # Keep the original IP address if the remote IP is invalid.
292                $ip = validate_ip($remote_ip) || $ip;
293                last;
294            }
295        }
296    }
297    return $ip;
298}
299
300sub validate_ip {
301    my $ip = shift;
302    return is_ipv4($ip) || is_ipv6($ip);
303}
304
305# Copied from Data::Validate::IP::is_ipv4().
306sub is_ipv4 {
307    my $ip = shift;
308    return unless defined $ip;
309
310    my @octets = $ip =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/;
311    return unless scalar(@octets) == 4;
312
313    foreach my $octet (@octets) {
314        return unless ($octet >= 0 && $octet <= 255 && $octet !~ /^0\d{1,2}$/);
315    }
316
317    # The IP address is valid and can now be detainted.
318    return join('.', @octets);
319}
320
321# Copied from Data::Validate::IP::is_ipv6().
322sub is_ipv6 {
323    my $ip = shift;
324    return unless defined $ip;
325
326    # If there is a :: then there must be only one :: and the length
327    # can be variable. Without it, the length must be 8 groups.
328    my @chunks = split(':', $ip);
329
330    # Need to check if the last chunk is an IPv4 address, if it is we
331    # pop it off and exempt it from the normal IPv6 checking and stick
332    # it back on at the end. If there is only one chunk and it's an IPv4
333    # address, then it isn't an IPv6 address.
334    my $ipv4;
335    my $expected_chunks = 8;
336    if (@chunks > 1 && is_ipv4($chunks[$#chunks])) {
337        $ipv4 = pop(@chunks);
338        $expected_chunks--;
339    }
340
341    my $empty = 0;
342    # Workaround to handle trailing :: being valid.
343    if ($ip =~ /[0-9a-f]{1,4}::$/) {
344        $empty++;
345    # Single trailing ':' is invalid.
346    } elsif ($ip =~ /:$/) {
347        return;
348    }
349
350    foreach my $chunk (@chunks) {
351        return unless $chunk =~ /^[0-9a-f]{0,4}$/i;
352        $empty++ if $chunk eq '';
353    }
354    # More than one :: block is bad, but if it starts with :: it will
355    # look like two, so we need an exception.
356    if ($empty == 2 && $ip =~ /^::/) {
357        # This is ok
358    } elsif ($empty > 1) {
359        return;
360    }
361
362    push(@chunks, $ipv4) if $ipv4;
363    # Need 8 chunks, or we need an empty section that could be filled
364    # to represent the missing '0' sections.
365    return unless (@chunks == $expected_chunks || @chunks < $expected_chunks && $empty);
366
367    my $ipv6 = join(':', @chunks);
368    # The IP address is valid and can now be detainted.
369    trick_taint($ipv6);
370
371    # Need to handle the exception of trailing :: being valid.
372    return "${ipv6}::" if $ip =~ /::$/;
373    return $ipv6;
374}
375
376sub use_attachbase {
377    my $attachbase = Bugzilla->params->{'attachment_base'};
378    return ($attachbase ne ''
379            && $attachbase ne Bugzilla->params->{'urlbase'}
380            && $attachbase ne Bugzilla->params->{'sslbase'}) ? 1 : 0;
381}
382
383sub diff_arrays {
384    my ($old_ref, $new_ref, $attrib) = @_;
385    $attrib ||= 'name';
386
387    my (%counts, %pos);
388    # We are going to alter the old array.
389    my @old = @$old_ref;
390    my $i = 0;
391
392    # $counts{foo}-- means old, $counts{foo}++ means new.
393    # If $counts{foo} becomes positive, then we are adding new items,
394    # else we simply cancel one old existing item. Remaining items
395    # in the old list have been removed.
396    foreach (@old) {
397        next unless defined $_;
398        my $value = blessed($_) ? $_->$attrib : $_;
399        $counts{$value}--;
400        push @{$pos{$value}}, $i++;
401    }
402    my @added;
403    foreach (@$new_ref) {
404        next unless defined $_;
405        my $value = blessed($_) ? $_->$attrib : $_;
406        if (++$counts{$value} > 0) {
407            # Ignore empty strings, but objects having an empty string
408            # as attribute are fine.
409            push(@added, $_) unless ($value eq '' && !blessed($_));
410        }
411        else {
412            my $old_pos = shift @{$pos{$value}};
413            $old[$old_pos] = undef;
414        }
415    }
416    # Ignore canceled items as well as empty strings.
417    my @removed = grep { defined $_ && $_ ne '' } @old;
418    return (\@removed, \@added);
419}
420
421# XXX - This is a temporary subroutine till we require Perl 5.10.1.
422# This will happen before Bugzilla 5.0rc1.
423sub say (@) {
424    print @_;
425    print "\n";
426}
427
428sub trim {
429    my ($str) = @_;
430    if ($str) {
431      $str =~ s/^\s+//g;
432      $str =~ s/\s+$//g;
433    }
434    return $str;
435}
436
437sub wrap_comment {
438    my ($comment, $cols) = @_;
439    my $wrappedcomment = "";
440
441    # Use 'local', as recommended by Text::Wrap's perldoc.
442    local $Text::Wrap::columns = $cols || COMMENT_COLS;
443    # Make words that are longer than COMMENT_COLS not wrap.
444    local $Text::Wrap::huge    = 'overflow';
445    # Don't mess with tabs.
446    local $Text::Wrap::unexpand = 0;
447
448    # If the line starts with ">", don't wrap it. Otherwise, wrap.
449    foreach my $line (split(/\r\n|\r|\n/, $comment)) {
450      if ($line =~ qr/^>/) {
451        $wrappedcomment .= ($line . "\n");
452      }
453      else {
454        # Due to a segfault in Text::Tabs::expand() when processing tabs with
455        # Unicode (see http://rt.perl.org/rt3/Public/Bug/Display.html?id=52104),
456        # we have to remove tabs before processing the comment. This restriction
457        # can go away when we require Perl 5.8.9 or newer.
458        $line =~ s/\t/    /g;
459        $wrappedcomment .= (wrap('', '', $line) . "\n");
460      }
461    }
462
463    chomp($wrappedcomment); # Text::Wrap adds an extra newline at the end.
464    return $wrappedcomment;
465}
466
467sub find_wrap_point {
468    my ($string, $maxpos) = @_;
469    if (!$string) { return 0 }
470    if (length($string) < $maxpos) { return length($string) }
471    my $wrappoint = rindex($string, ",", $maxpos); # look for comma
472    if ($wrappoint <= 0) {  # can't find comma
473        $wrappoint = rindex($string, " ", $maxpos); # look for space
474        if ($wrappoint <= 0) {  # can't find space
475            $wrappoint = rindex($string, "-", $maxpos); # look for hyphen
476            if ($wrappoint <= 0) {  # can't find hyphen
477                $wrappoint = $maxpos;  # just truncate it
478            } else {
479                $wrappoint++; # leave hyphen on the left side
480            }
481        }
482    }
483    return $wrappoint;
484}
485
486sub join_activity_entries {
487    my ($field, $current_change, $new_change) = @_;
488    # We need to insert characters as these were removed by old
489    # LogActivityEntry code.
490
491    return $new_change if $current_change eq '';
492
493    # Buglists and see_also need the comma restored
494    if ($field eq 'dependson' || $field eq 'blocked' || $field eq 'see_also') {
495        if (substr($new_change, 0, 1) eq ',' || substr($new_change, 0, 1) eq ' ') {
496            return $current_change . $new_change;
497        } else {
498            return $current_change . ', ' . $new_change;
499        }
500    }
501
502    # Assume bug_file_loc contain a single url, don't insert a delimiter
503    if ($field eq 'bug_file_loc') {
504        return $current_change . $new_change;
505    }
506
507    # All other fields get a space unless the first character of the second
508    # string is a comma or space
509    if (substr($new_change, 0, 1) eq ',' || substr($new_change, 0, 1) eq ' ') {
510        return $current_change . $new_change;
511    } else {
512        return $current_change . ' ' . $new_change;
513    }
514}
515
516sub wrap_hard {
517    my ($string, $columns) = @_;
518    local $Text::Wrap::columns = $columns;
519    local $Text::Wrap::unexpand = 0;
520    local $Text::Wrap::huge = 'wrap';
521
522    my $wrapped = wrap('', '', $string);
523    chomp($wrapped);
524    return $wrapped;
525}
526
527sub format_time {
528    my ($date, $format, $timezone) = @_;
529
530    # If $format is not set, try to guess the correct date format.
531    if (!$format) {
532        if (!ref $date
533            && $date =~ /^(\d{4})[-\.](\d{2})[-\.](\d{2}) (\d{2}):(\d{2})(:(\d{2}))?$/)
534        {
535            my $sec = $7;
536            if (defined $sec) {
537                $format = "%Y-%m-%d %T %Z";
538            } else {
539                $format = "%Y-%m-%d %R %Z";
540            }
541        } else {
542            # Default date format. See DateTime for other formats available.
543            $format = "%Y-%m-%d %R %Z";
544        }
545    }
546
547    my $dt = ref $date ? $date : datetime_from($date, $timezone);
548    $date = defined $dt ? $dt->strftime($format) : '';
549    return trim($date);
550}
551
552sub datetime_from {
553    my ($date, $timezone) = @_;
554
555    # In the database, this is the "0" date.
556    return undef if $date =~ /^0000/;
557
558    # strptime($date) returns an empty array if $date has an invalid
559    # date format.
560    my @time = strptime($date);
561
562    unless (scalar @time) {
563        # If an unknown timezone is passed (such as MSK, for Moskow),
564        # strptime() is unable to parse the date. We try again, but we first
565        # remove the timezone.
566        $date =~ s/\s+\S+$//;
567        @time = strptime($date);
568    }
569
570    return undef if !@time;
571
572    # strptime() counts years from 1900, except if they are older than 1901
573    # in which case it returns the full year (so 1890 -> 1890, but 1984 -> 84,
574    # and 3790 -> 1890). We make a guess and assume that 1100 <= year < 3000.
575    $time[5] += 1900 if $time[5] < 1100;
576
577    my %args = (
578        year   => $time[5],
579        # Months start from 0 (January).
580        month  => $time[4] + 1,
581        day    => $time[3],
582        hour   => $time[2],
583        minute => $time[1],
584        # DateTime doesn't like fractional seconds.
585        # Also, sometimes seconds are undef.
586        second => defined($time[0]) ? int($time[0]) : undef,
587        # If a timezone was specified, use it. Otherwise, use the
588        # local timezone.
589        time_zone => Bugzilla->local_timezone->offset_as_string($time[6])
590                     || Bugzilla->local_timezone,
591    );
592
593    # If something wasn't specified in the date, it's best to just not
594    # pass it to DateTime at all. (This is important for doing datetime_from
595    # on the deadline field, which is usually just a date with no time.)
596    foreach my $arg (keys %args) {
597        delete $args{$arg} if !defined $args{$arg};
598    }
599
600    # This module takes time to load and is only used here, so we
601    # |require| it here rather than |use| it.
602    require DateTime;
603    my $dt = new DateTime(\%args);
604
605    # Now display the date using the given timezone,
606    # or the user's timezone if none is given.
607    $dt->set_time_zone($timezone || Bugzilla->user->timezone);
608    return $dt;
609}
610
611sub bz_crypt {
612    my ($password, $salt) = @_;
613
614    my $algorithm;
615    if (!defined $salt) {
616        # If you don't use a salt, then people can create tables of
617        # hashes that map to particular passwords, and then break your
618        # hashing very easily if they have a large-enough table of common
619        # (or even uncommon) passwords. So we generate a unique salt for
620        # each password in the database, and then just prepend it to
621        # the hash.
622        $salt = generate_random_password(PASSWORD_SALT_LENGTH);
623        $algorithm = PASSWORD_DIGEST_ALGORITHM;
624    }
625
626    # We append the algorithm used to the string. This is good because then
627    # we can change the algorithm being used, in the future, without
628    # disrupting the validation of existing passwords. Also, this tells
629    # us if a password is using the old "crypt" method of hashing passwords,
630    # because the algorithm will be missing from the string.
631    if ($salt =~ /{([^}]+)}$/) {
632        $algorithm = $1;
633    }
634
635    # Wide characters cause crypt and Digest to die.
636    if (Bugzilla->params->{'utf8'}) {
637        utf8::encode($password) if utf8::is_utf8($password);
638    }
639
640    my $crypted_password;
641    if (!$algorithm) {
642        # Crypt the password.
643        $crypted_password = crypt($password, $salt);
644
645        # HACK: Perl has bug where returned crypted password is considered
646        # tainted. See http://rt.perl.org/rt3/Public/Bug/Display.html?id=59998
647        unless(tainted($password) || tainted($salt)) {
648            trick_taint($crypted_password);
649        }
650    }
651    else {
652        my $hasher = Digest->new($algorithm);
653        # We only want to use the first characters of the salt, no
654        # matter how long of a salt we may have been passed.
655        $salt = substr($salt, 0, PASSWORD_SALT_LENGTH);
656        $hasher->add($password, $salt);
657        $crypted_password = $salt . $hasher->b64digest . "{$algorithm}";
658    }
659
660    # Return the crypted password.
661    return $crypted_password;
662}
663
664# If you want to understand the security of strings generated by this
665# function, here's a quick formula that will help you estimate:
666# We pick from 62 characters, which is close to 64, which is 2^6.
667# So 8 characters is (2^6)^8 == 2^48 combinations. Just multiply 6
668# by the number of characters you generate, and that gets you the equivalent
669# strength of the string in bits.
670sub generate_random_password {
671    my $size = shift || 10; # default to 10 chars if nothing specified
672    return join("", map{ ('0'..'9','a'..'z','A'..'Z')[irand 62] } (1..$size));
673}
674
675sub validate_email_syntax {
676    my ($addr) = @_;
677    my $match = Bugzilla->params->{'emailregexp'};
678    my $email = $addr . Bugzilla->params->{'emailsuffix'};
679    # This regexp follows RFC 2822 section 3.4.1.
680    my $addr_spec = $Email::Address::addr_spec;
681    # RFC 2822 section 2.1 specifies that email addresses must
682    # be made of US-ASCII characters only.
683    # Email::Address::addr_spec doesn't enforce this.
684    # We set the max length to 127 to ensure addresses aren't truncated when
685    # inserted into the tokens.eventdata field.
686    if ($addr =~ /$match/
687        && $email !~ /\P{ASCII}/
688        && $email =~ /^$addr_spec$/
689        && length($email) <= 127)
690    {
691        # We assume these checks to suffice to consider the address untainted.
692        trick_taint($_[0]);
693        return 1;
694    }
695    return 0;
696}
697
698sub check_email_syntax {
699    my ($addr) = @_;
700
701    unless (validate_email_syntax(@_)) {
702        my $email = $addr . Bugzilla->params->{'emailsuffix'};
703        ThrowUserError('illegal_email_address', { addr => $email });
704    }
705}
706
707sub validate_date {
708    my ($date) = @_;
709    my $date2;
710
711    # $ts is undefined if the parser fails.
712    my $ts = str2time($date);
713    if ($ts) {
714        $date2 = time2str("%Y-%m-%d", $ts);
715
716        $date =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/;
717        $date2 =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/;
718    }
719    my $ret = ($ts && $date eq $date2);
720    return $ret ? 1 : 0;
721}
722
723sub validate_time {
724    my ($time) = @_;
725    my $time2;
726
727    # $ts is undefined if the parser fails.
728    my $ts = str2time($time);
729    if ($ts) {
730        $time2 = time2str("%H:%M:%S", $ts);
731        if ($time =~ /^(\d{1,2}):(\d\d)(?::(\d\d))?$/) {
732            $time = sprintf("%02d:%02d:%02d", $1, $2, $3 || 0);
733        }
734    }
735    my $ret = ($ts && $time eq $time2);
736    return $ret ? 1 : 0;
737}
738
739sub is_7bit_clean {
740    return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/;
741}
742
743sub clean_text {
744    my $dtext = shift;
745    if ($dtext) {
746        # change control characters into a space
747        $dtext =~ s/[\x00-\x1F\x7F]+/ /g;
748    }
749    return trim($dtext);
750}
751
752sub on_main_db (&) {
753    my $code = shift;
754    my $original_dbh = Bugzilla->dbh;
755    Bugzilla->request_cache->{dbh} = Bugzilla->dbh_main;
756    $code->();
757    Bugzilla->request_cache->{dbh} = $original_dbh;
758}
759
760sub get_text {
761    my ($name, $vars) = @_;
762    my $template = Bugzilla->template_inner;
763    $vars ||= {};
764    $vars->{'message'} = $name;
765    my $message;
766    $template->process('global/message.txt.tmpl', $vars, \$message)
767      || ThrowTemplateError($template->error());
768
769    # Remove the indenting that exists in messages.html.tmpl.
770    $message =~ s/^    //gm;
771    return $message;
772}
773
774sub template_var {
775    my $name = shift;
776    my $cache = Bugzilla->request_cache->{util_template_var} ||= {};
777    my $template = Bugzilla->template_inner;
778    my $lang = $template->context->{bz_language};
779    return $cache->{$lang}->{$name} if defined $cache->{$lang};
780    my %vars;
781    # Note: If we suddenly start needing a lot of template_var variables,
782    # they should move into their own template, not field-descs.
783    $template->process('global/field-descs.none.tmpl',
784                       { vars => \%vars, in_template_var => 1 })
785      || ThrowTemplateError($template->error());
786
787    $cache->{$lang} = \%vars;
788    return $vars{$name};
789}
790
791sub display_value {
792    my ($field, $value) = @_;
793    my $value_descs = template_var('value_descs');
794    if (defined $value_descs->{$field}->{$value}) {
795        return $value_descs->{$field}->{$value};
796    }
797    return $value;
798}
799
800sub disable_utf8 {
801    if (Bugzilla->params->{'utf8'}) {
802        binmode STDOUT, ':bytes'; # Turn off UTF8 encoding.
803    }
804}
805
806use constant UTF8_ACCIDENTAL => qw(shiftjis big5-eten euc-kr euc-jp);
807
808sub detect_encoding {
809    my $data = shift;
810
811    Bugzilla->feature('detect_charset')
812      || ThrowUserError('feature_disabled', { feature => 'detect_charset' });
813
814    require Encode::Detect::Detector;
815    import Encode::Detect::Detector 'detect';
816
817    my $encoding = detect($data);
818    $encoding = resolve_alias($encoding) if $encoding;
819
820    # Encode::Detect is bad at detecting certain charsets, but Encode::Guess
821    # is better at them. Here's the details:
822
823    # shiftjis, big5-eten, euc-kr, and euc-jp: (Encode::Detect
824    # tends to accidentally mis-detect UTF-8 strings as being
825    # these encodings.)
826    if ($encoding && grep($_ eq $encoding, UTF8_ACCIDENTAL)) {
827        $encoding = undef;
828        my $decoder = guess_encoding($data, UTF8_ACCIDENTAL);
829        $encoding = $decoder->name if ref $decoder;
830    }
831
832    # Encode::Detect sometimes mis-detects various ISO encodings as iso-8859-8,
833    # or cp1255, but Encode::Guess can usually tell which one it is.
834    if ($encoding && ($encoding eq 'iso-8859-8' || $encoding eq 'cp1255')) {
835        my $decoded_as = _guess_iso($data, 'iso-8859-8',
836            # These are ordered this way because it gives the most
837            # accurate results.
838            qw(cp1252 iso-8859-7 iso-8859-2));
839        $encoding = $decoded_as if $decoded_as;
840    }
841
842    return $encoding;
843}
844
845# A helper for detect_encoding.
846sub _guess_iso {
847    my ($data, $versus, @isos) = (shift, shift, shift);
848
849    my $encoding;
850    foreach my $iso (@isos) {
851        my $decoder = guess_encoding($data, ($iso, $versus));
852        if (ref $decoder) {
853            $encoding = $decoder->name if ref $decoder;
854            last;
855        }
856    }
857    return $encoding;
858}
859
8601;
861
862__END__
863
864=head1 NAME
865
866Bugzilla::Util - Generic utility functions for bugzilla
867
868=head1 SYNOPSIS
869
870  use Bugzilla::Util;
871
872  # Functions for dealing with variable tainting
873  trick_taint($var);
874  detaint_natural($var);
875  detaint_signed($var);
876
877  # Functions for quoting
878  html_quote($var);
879  url_quote($var);
880  xml_quote($var);
881  email_filter($var);
882
883  # Functions that tell you about your environment
884  my $is_cgi   = i_am_cgi();
885  my $is_webservice = i_am_webservice();
886  my $urlbase  = correct_urlbase();
887
888  # Data manipulation
889  ($removed, $added) = diff_arrays(\@old, \@new);
890
891  # Functions for manipulating strings
892  $val = trim(" abc ");
893  $wrapped = wrap_comment($comment);
894
895  # Functions for formatting time
896  format_time($time);
897  datetime_from($time, $timezone);
898
899  # Cryptographic Functions
900  $crypted_password = bz_crypt($password);
901  $new_password = generate_random_password($password_length);
902
903  # Validation Functions
904  validate_email_syntax($email);
905  check_email_syntax($email);
906  validate_date($date);
907
908  # DB-related functions
909  on_main_db {
910     ... code here ...
911  };
912
913=head1 DESCRIPTION
914
915This package contains various utility functions which do not belong anywhere
916else.
917
918B<It is not intended as a general dumping group for something which
919people feel might be useful somewhere, someday>. Do not add methods to this
920package unless it is intended to be used for a significant number of files,
921and it does not belong anywhere else.
922
923=head1 FUNCTIONS
924
925This package provides several types of routines:
926
927=head2 Tainting
928
929Several functions are available to deal with tainted variables. B<Use these
930with care> to avoid security holes.
931
932=over 4
933
934=item C<trick_taint($val)>
935
936Tricks perl into untainting a particular variable.
937
938Use trick_taint() when you know that there is no way that the data
939in a scalar can be tainted, but taint mode still bails on it.
940
941B<WARNING!! Using this routine on data that really could be tainted defeats
942the purpose of taint mode.  It should only be used on variables that have been
943sanity checked in some way and have been determined to be OK.>
944
945=item C<detaint_natural($num)>
946
947This routine detaints a natural number. It returns a true value if the
948value passed in was a valid natural number, else it returns false. You
949B<MUST> check the result of this routine to avoid security holes.
950
951=item C<detaint_signed($num)>
952
953This routine detaints a signed integer. It returns a true value if the
954value passed in was a valid signed integer, else it returns false. You
955B<MUST> check the result of this routine to avoid security holes.
956
957=back
958
959=head2 Quoting
960
961Some values may need to be quoted from perl. However, this should in general
962be done in the template where possible.
963
964=over 4
965
966=item C<html_quote($val)>
967
968Returns a value quoted for use in HTML, with &, E<lt>, E<gt>, E<34> and @ being
969replaced with their appropriate HTML entities.  Also, Unicode BiDi controls are
970deleted.
971
972=item C<html_light_quote($val)>
973
974Returns a string where only explicitly allowed HTML elements and attributes
975are kept. All HTML elements and attributes not being in the whitelist are either
976escaped (if HTML::Scrubber is not installed) or removed.
977
978=item C<url_quote($val)>
979
980Quotes characters so that they may be included as part of a url.
981
982=item C<css_class_quote($val)>
983
984Quotes characters so that they may be used as CSS class names. Spaces
985and forward slashes are replaced by underscores.
986
987=item C<xml_quote($val)>
988
989This is similar to C<html_quote>, except that ' is escaped to &apos;. This
990is kept separate from html_quote partly for compatibility with previous code
991(for &apos;) and partly for future handling of non-ASCII characters.
992
993=item C<email_filter>
994
995Removes the hostname from email addresses in the string, if the user
996currently viewing Bugzilla is logged out. If the user is logged-in,
997this filter just returns the input string.
998
999=back
1000
1001=head2 Environment and Location
1002
1003Functions returning information about your environment or location.
1004
1005=over 4
1006
1007=item C<i_am_cgi()>
1008
1009Tells you whether or not you are being run as a CGI script in a web
1010server. For example, it would return false if the caller is running
1011in a command-line script.
1012
1013=item C<i_am_webservice()>
1014
1015Tells you whether or not the current usage mode is WebServices related
1016such as JSONRPC or XMLRPC.
1017
1018=item C<correct_urlbase()>
1019
1020Returns either the C<sslbase> or C<urlbase> parameter, depending on the
1021current setting for the C<ssl_redirect> parameter.
1022
1023=item C<remote_ip()>
1024
1025Returns the IP address of the remote client. If Bugzilla is behind
1026a trusted proxy, it will get the remote IP address by looking at the
1027X-Forwarded-For header.
1028
1029=item C<validate_ip($ip)>
1030
1031Returns the sanitized IP address if it is a valid IPv4 or IPv6 address,
1032else returns undef.
1033
1034=item C<use_attachbase()>
1035
1036Returns true if an alternate host is used to display attachments; false
1037otherwise.
1038
1039=back
1040
1041=head2 Data Manipulation
1042
1043=over 4
1044
1045=item C<diff_arrays(\@old, \@new)>
1046
1047 Description: Takes two arrayrefs, and will tell you what it takes to
1048              get from @old to @new.
1049 Params:      @old = array that you are changing from
1050              @new = array that you are changing to
1051 Returns:     A list of two arrayrefs. The first is a reference to an
1052              array containing items that were removed from @old. The
1053              second is a reference to an array containing items
1054              that were added to @old. If both returned arrays are
1055              empty, @old and @new contain the same values.
1056
1057=back
1058
1059=head2 String Manipulation
1060
1061=over 4
1062
1063=item C<trim($str)>
1064
1065Removes any leading or trailing whitespace from a string. This routine does not
1066modify the existing string.
1067
1068=item C<wrap_hard($string, $size)>
1069
1070Wraps a string, so that a line is I<never> longer than C<$size>.
1071Returns the string, wrapped.
1072
1073=item C<wrap_comment($comment)>
1074
1075Takes a bug comment, and wraps it to the appropriate length. The length is
1076currently specified in C<Bugzilla::Constants::COMMENT_COLS>. Lines beginning
1077with ">" are assumed to be quotes, and they will not be wrapped.
1078
1079The intended use of this function is to wrap comments that are about to be
1080displayed or emailed. Generally, wrapped text should not be stored in the
1081database.
1082
1083=item C<find_wrap_point($string, $maxpos)>
1084
1085Search for a comma, a whitespace or a hyphen to split $string, within the first
1086$maxpos characters. If none of them is found, just split $string at $maxpos.
1087The search starts at $maxpos and goes back to the beginning of the string.
1088
1089=item C<join_activity_entries($field, $current_change, $new_change)>
1090
1091Joins two strings together so they appear as one. The field name is specified
1092as the method of joining the two strings depends on this. Returns the
1093combined string.
1094
1095=item C<is_7bit_clean($str)>
1096
1097Returns true is the string contains only 7-bit characters (ASCII 32 through 126,
1098ASCII 10 (LineFeed) and ASCII 13 (Carrage Return).
1099
1100=item C<disable_utf8()>
1101
1102Disable utf8 on STDOUT (and display raw data instead).
1103
1104=item C<detect_encoding($str)>
1105
1106Guesses what encoding a given data is encoded in, returning the canonical name
1107of the detected encoding (which may be different from the MIME charset
1108specification).
1109
1110=item C<clean_text($str)>
1111Returns the parameter "cleaned" by exchanging non-printable characters with spaces.
1112Specifically characters (ASCII 0 through 31) and (ASCII 127) will become ASCII 32 (Space).
1113
1114=item C<get_text>
1115
1116=over
1117
1118=item B<Description>
1119
1120This is a method of getting localized strings within Bugzilla code.
1121Use this when you don't want to display a whole template, you just
1122want a particular string.
1123
1124It uses the F<global/message.txt.tmpl> template to return a string.
1125
1126=item B<Params>
1127
1128=over
1129
1130=item C<$message> - The identifier for the message.
1131
1132=item C<$vars> - A hashref. Any variables you want to pass to the template.
1133
1134=back
1135
1136=item B<Returns>
1137
1138A string.
1139
1140=back
1141
1142
1143=item C<template_var>
1144
1145This is a method of getting the value of a variable from a template in
1146Perl code. The available variables are in the C<global/field-descs.none.tmpl>
1147template. Just pass in the name of the variable that you want the value of.
1148
1149
1150=back
1151
1152=head2 Formatting Time
1153
1154=over 4
1155
1156=item C<format_time($time)>
1157
1158Takes a time and converts it to the desired format and timezone.
1159If no format is given, the routine guesses the correct one and returns
1160an empty array if it cannot. If no timezone is given, the user's timezone
1161is used, as defined in his preferences.
1162
1163This routine is mainly called from templates to filter dates, see
1164"FILTER time" in L<Bugzilla::Template>.
1165
1166=item C<datetime_from($time, $timezone)>
1167
1168Returns a DateTime object given a date string. If the string is not in some
1169valid date format that C<strptime> understands, we return C<undef>.
1170
1171You can optionally specify a timezone for the returned date. If not
1172specified, defaults to the currently-logged-in user's timezone, or
1173the Bugzilla server's local timezone if there isn't a logged-in user.
1174
1175=back
1176
1177=head2 Cryptography
1178
1179=over 4
1180
1181=item C<bz_crypt($password, $salt)>
1182
1183Takes a string and returns a hashed (encrypted) value for it, using a
1184random salt. An optional salt string may also be passed in.
1185
1186Please always use this function instead of the built-in perl C<crypt>
1187function, when checking or setting a password. Bugzilla does not use
1188C<crypt>.
1189
1190=begin undocumented
1191
1192Random salts are generated because the alternative is usually
1193to use the first two characters of the password itself, and since
1194the salt appears in plaintext at the beginning of the encrypted
1195password string this has the effect of revealing the first two
1196characters of the password to anyone who views the encrypted version.
1197
1198=end undocumented
1199
1200=item C<generate_random_password($password_length)>
1201
1202Returns an alphanumeric string with the specified length
1203(10 characters by default). Use this function to generate passwords
1204and tokens.
1205
1206=back
1207
1208=head2 Validation
1209
1210=over 4
1211
1212=item C<validate_email_syntax($email)>
1213
1214Do a syntax checking for a legal email address and returns 1 if
1215the check is successful, else returns 0.
1216Untaints C<$email> if successful.
1217
1218=item C<check_email_syntax($email)>
1219
1220Do a syntax checking for a legal email address and throws an error
1221if the check fails.
1222Untaints C<$email> if successful.
1223
1224=item C<validate_date($date)>
1225
1226Make sure the date has the correct format and returns 1 if
1227the check is successful, else returns 0.
1228
1229=back
1230
1231=head2 Database
1232
1233=over
1234
1235=item C<on_main_db>
1236
1237Runs a block of code always on the main DB. Useful for when you're inside
1238a subroutine and need to do some writes to the database, but don't know
1239if Bugzilla is currently using the shadowdb or not. Used like:
1240
1241 on_main_db {
1242     my $dbh = Bugzilla->dbh;
1243     $dbh->do("INSERT ...");
1244 }
1245
1246=back
1247