1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4#
5# This Source Code Form is "Incompatible With Secondary Licenses", as
6# defined by the Mozilla Public License, v. 2.0.
7
8package Bugzilla::Util;
9
10use 5.10.1;
11use strict;
12use warnings;
13
14use parent qw(Exporter);
15@Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed
16                             html_quote url_quote xml_quote
17                             css_class_quote html_light_quote
18                             i_am_cgi i_am_webservice correct_urlbase remote_ip
19                             validate_ip do_ssl_redirect_if_required use_attachbase
20                             diff_arrays on_main_db
21                             trim wrap_hard wrap_comment find_wrap_point
22                             format_time validate_date validate_time datetime_from
23                             is_7bit_clean bz_crypt generate_random_password
24                             validate_email_syntax check_email_syntax clean_text
25                             get_text template_var display_value disable_utf8
26                             detect_encoding email_filter
27                             join_activity_entries read_text write_text);
28
29use Bugzilla::Constants;
30use Bugzilla::RNG qw(irand);
31use Bugzilla::Error;
32
33use Date::Parse;
34use Date::Format;
35use Digest;
36use Email::Address;
37use List::Util qw(first);
38use Scalar::Util qw(tainted blessed);
39use Text::Wrap;
40use Encode qw(encode decode resolve_alias);
41use Encode::Guess;
42use File::Basename qw(dirname);
43use File::Temp qw(tempfile);
44
45sub trick_taint {
46    require Carp;
47    Carp::confess("Undef to trick_taint") unless defined $_[0];
48    my $match = $_[0] =~ /^(.*)$/s;
49    $_[0] = $match ? $1 : undef;
50    return (defined($_[0]));
51}
52
53sub detaint_natural {
54    my $match = $_[0] =~ /^([0-9]+)$/;
55    $_[0] = $match ? int($1) : undef;
56    return (defined($_[0]));
57}
58
59sub detaint_signed {
60    my $match = $_[0] =~ /^([-+]?[0-9]+)$/;
61    # The "int()" call removes any leading plus sign.
62    $_[0] = $match ? int($1) : undef;
63    return (defined($_[0]));
64}
65
66# Bug 120030: Override html filter to obscure the '@' in user
67#             visible strings.
68# Bug 319331: Handle BiDi disruptions.
69sub html_quote {
70    my $var = shift;
71    $var =~ s/&/&/g;
72    $var =~ s/</&lt;/g;
73    $var =~ s/>/&gt;/g;
74    $var =~ s/"/&quot;/g;
75    # Obscure '@'.
76    $var =~ s/\@/\&#64;/g;
77
78    state $use_utf8 = Bugzilla->params->{'utf8'};
79
80    if ($use_utf8) {
81        # Remove control characters if the encoding is utf8.
82        # Other multibyte encodings may be using this range; so ignore if not utf8.
83        $var =~ s/(?![\t\r\n])[[:cntrl:]]//g;
84
85        # Remove the following characters because they're
86        # influencing BiDi:
87        # --------------------------------------------------------
88        # |Code  |Name                      |UTF-8 representation|
89        # |------|--------------------------|--------------------|
90        # |U+202a|Left-To-Right Embedding   |0xe2 0x80 0xaa      |
91        # |U+202b|Right-To-Left Embedding   |0xe2 0x80 0xab      |
92        # |U+202c|Pop Directional Formatting|0xe2 0x80 0xac      |
93        # |U+202d|Left-To-Right Override    |0xe2 0x80 0xad      |
94        # |U+202e|Right-To-Left Override    |0xe2 0x80 0xae      |
95        # --------------------------------------------------------
96        #
97        # The following are characters influencing BiDi, too, but
98        # they can be spared from filtering because they don't
99        # influence more than one character right or left:
100        # --------------------------------------------------------
101        # |Code  |Name                      |UTF-8 representation|
102        # |------|--------------------------|--------------------|
103        # |U+200e|Left-To-Right Mark        |0xe2 0x80 0x8e      |
104        # |U+200f|Right-To-Left Mark        |0xe2 0x80 0x8f      |
105        # --------------------------------------------------------
106        $var =~ tr/\x{202a}-\x{202e}//d;
107    }
108    return $var;
109}
110
111sub read_text {
112    my ($filename) = @_;
113    open my $fh, '<:encoding(utf-8)', $filename;
114    local $/ = undef;
115    my $content = <$fh>;
116    close $fh;
117    return $content;
118}
119
120sub write_text {
121    my ($filename, $content) = @_;
122    my ($tmp_fh, $tmp_filename) = tempfile('.tmp.XXXXXXXXXX',
123        DIR    => dirname($filename),
124        UNLINK => 0,
125    );
126    binmode $tmp_fh, ':encoding(utf-8)';
127    print $tmp_fh $content;
128    close $tmp_fh;
129    # File::Temp tries for secure files, but File::Slurp used the umask.
130    chmod(0666 & ~umask, $tmp_filename);
131    rename $tmp_filename, $filename;
132}
133
134sub html_light_quote {
135    my ($text) = @_;
136    # admin/table.html.tmpl calls |FILTER html_light| many times.
137    # There is no need to recreate the HTML::Scrubber object again and again.
138    my $scrubber = Bugzilla->process_cache->{html_scrubber};
139
140    # List of allowed HTML elements having no attributes.
141    my @allow = qw(b strong em i u p br abbr acronym ins del cite code var
142                   dfn samp kbd big small sub sup tt dd dt dl ul li ol
143                   fieldset legend);
144
145    if (!Bugzilla->feature('html_desc')) {
146        my $safe = join('|', @allow);
147        my $chr = chr(1);
148
149        # First, escape safe elements.
150        $text =~ s#<($safe)>#$chr$1$chr#go;
151        $text =~ s#</($safe)>#$chr/$1$chr#go;
152        # Now filter < and >.
153        $text =~ s#<#&lt;#g;
154        $text =~ s#>#&gt;#g;
155        # Restore safe elements.
156        $text =~ s#$chr/($safe)$chr#</$1>#go;
157        $text =~ s#$chr($safe)$chr#<$1>#go;
158        return $text;
159    }
160    elsif (!$scrubber) {
161        # We can be less restrictive. We can accept elements with attributes.
162        push(@allow, qw(a blockquote q span));
163
164        # Allowed protocols.
165        my $safe_protocols = join('|', SAFE_PROTOCOLS);
166        my $protocol_regexp = qr{(^(?:$safe_protocols):|^[^:]+$)}i;
167
168        # Deny all elements and attributes unless explicitly authorized.
169        my @default = (0 => {
170                             id    => 1,
171                             name  => 1,
172                             class => 1,
173                             '*'   => 0, # Reject all other attributes.
174                            }
175                       );
176
177        # Specific rules for allowed elements. If no specific rule is set
178        # for a given element, then the default is used.
179        my @rules = (a => {
180                           href   => $protocol_regexp,
181                           target => qr{^(?:_blank|_parent|_self|_top)$}i,
182                           title  => 1,
183                           id     => 1,
184                           name   => 1,
185                           class  => 1,
186                           '*'    => 0, # Reject all other attributes.
187                          },
188                     blockquote => {
189                                    cite => $protocol_regexp,
190                                    id    => 1,
191                                    name  => 1,
192                                    class => 1,
193                                    '*'  => 0, # Reject all other attributes.
194                                   },
195                     'q' => {
196                             cite => $protocol_regexp,
197                             id    => 1,
198                             name  => 1,
199                             class => 1,
200                             '*'  => 0, # Reject all other attributes.
201                          },
202                    );
203
204        Bugzilla->process_cache->{html_scrubber} = $scrubber =
205          HTML::Scrubber->new(default => \@default,
206                              allow   => \@allow,
207                              rules   => \@rules,
208                              comment => 0,
209                              process => 0);
210    }
211    return $scrubber->scrub($text);
212}
213
214sub email_filter {
215    my ($toencode) = @_;
216    if (!Bugzilla->user->id) {
217        my @emails = Email::Address->parse($toencode);
218        if (scalar @emails) {
219            my @hosts = map { quotemeta($_->host) } @emails;
220            my $hosts_re = join('|', @hosts);
221            $toencode =~ s/\@(?:$hosts_re)//g;
222            return $toencode;
223        }
224    }
225    return $toencode;
226}
227
228# This originally came from CGI.pm, by Lincoln D. Stein
229sub url_quote {
230    my ($toencode) = (@_);
231    utf8::encode($toencode) # The below regex works only on bytes
232        if Bugzilla->params->{'utf8'} && utf8::is_utf8($toencode);
233    $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg;
234    return $toencode;
235}
236
237sub css_class_quote {
238    my ($toencode) = (@_);
239    $toencode =~ s#[ /]#_#g;
240    $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("&#x%x;",ord($1))/eg;
241    return $toencode;
242}
243
244sub xml_quote {
245    my ($var) = (@_);
246    $var =~ s/\&/\&amp;/g;
247    $var =~ s/</\&lt;/g;
248    $var =~ s/>/\&gt;/g;
249    $var =~ s/\"/\&quot;/g;
250    $var =~ s/\'/\&apos;/g;
251
252    # the following nukes characters disallowed by the XML 1.0
253    # spec, Production 2.2. 1.0 declares that only the following
254    # are valid:
255    # (#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF])
256    $var =~ s/([\x{0001}-\x{0008}]|
257               [\x{000B}-\x{000C}]|
258               [\x{000E}-\x{001F}]|
259               [\x{D800}-\x{DFFF}]|
260               [\x{FFFE}-\x{FFFF}])//gx;
261    return $var;
262}
263
264sub i_am_cgi {
265    # I use SERVER_SOFTWARE because it's required to be
266    # defined for all requests in the CGI spec.
267    return exists $ENV{'SERVER_SOFTWARE'} ? 1 : 0;
268}
269
270sub i_am_webservice {
271    my $usage_mode = Bugzilla->usage_mode;
272    return $usage_mode == USAGE_MODE_XMLRPC
273           || $usage_mode == USAGE_MODE_JSON
274           || $usage_mode == USAGE_MODE_REST;
275}
276
277# This exists as a separate function from Bugzilla::CGI::redirect_to_https
278# because we don't want to create a CGI object during XML-RPC calls
279# (doing so can mess up XML-RPC).
280sub do_ssl_redirect_if_required {
281    return if !i_am_cgi();
282    return if !Bugzilla->params->{'ssl_redirect'};
283
284    my $sslbase = Bugzilla->params->{'sslbase'};
285
286    # If we're already running under SSL, never redirect.
287    return if uc($ENV{HTTPS} || '') eq 'ON';
288    # Never redirect if there isn't an sslbase.
289    return if !$sslbase;
290    Bugzilla->cgi->redirect_to_https();
291}
292
293sub correct_urlbase {
294    my $ssl = Bugzilla->params->{'ssl_redirect'};
295    my $urlbase = Bugzilla->params->{'urlbase'};
296    my $sslbase = Bugzilla->params->{'sslbase'};
297
298    if (!$sslbase) {
299        return $urlbase;
300    }
301    elsif ($ssl) {
302        return $sslbase;
303    }
304    else {
305        # Return what the user currently uses.
306        return (uc($ENV{HTTPS} || '') eq 'ON') ? $sslbase : $urlbase;
307    }
308}
309
310sub remote_ip {
311    my $ip = $ENV{'REMOTE_ADDR'} || '127.0.0.1';
312    my @proxies = split(/[\s,]+/, Bugzilla->params->{'inbound_proxies'});
313
314    # If the IP address is one of our trusted proxies, then we look at
315    # the X-Forwarded-For header to determine the real remote IP address.
316    if ($ENV{'HTTP_X_FORWARDED_FOR'} && first { $_ eq $ip } @proxies) {
317        my @ips = split(/[\s,]+/, $ENV{'HTTP_X_FORWARDED_FOR'});
318        # This header can contain several IP addresses. We want the
319        # IP address of the machine which connected to our proxies as
320        # all other IP addresses may be fake or internal ones.
321        # Note that this may block a whole external proxy, but we have
322        # no way to determine if this proxy is malicious or trustable.
323        foreach my $remote_ip (reverse @ips) {
324            if (!first { $_ eq $remote_ip } @proxies) {
325                # Keep the original IP address if the remote IP is invalid.
326                $ip = validate_ip($remote_ip) || $ip;
327                last;
328            }
329        }
330    }
331    return $ip;
332}
333
334sub validate_ip {
335    my $ip = shift;
336    return is_ipv4($ip) || is_ipv6($ip);
337}
338
339# Copied from Data::Validate::IP::is_ipv4().
340sub is_ipv4 {
341    my $ip = shift;
342    return unless defined $ip;
343
344    my @octets = $ip =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/;
345    return unless scalar(@octets) == 4;
346
347    foreach my $octet (@octets) {
348        return unless ($octet >= 0 && $octet <= 255 && $octet !~ /^0\d{1,2}$/);
349    }
350
351    # The IP address is valid and can now be detainted.
352    return join('.', @octets);
353}
354
355# Copied from Data::Validate::IP::is_ipv6().
356sub is_ipv6 {
357    my $ip = shift;
358    return unless defined $ip;
359
360    # If there is a :: then there must be only one :: and the length
361    # can be variable. Without it, the length must be 8 groups.
362    my @chunks = split(':', $ip);
363
364    # Need to check if the last chunk is an IPv4 address, if it is we
365    # pop it off and exempt it from the normal IPv6 checking and stick
366    # it back on at the end. If there is only one chunk and it's an IPv4
367    # address, then it isn't an IPv6 address.
368    my $ipv4;
369    my $expected_chunks = 8;
370    if (@chunks > 1 && is_ipv4($chunks[$#chunks])) {
371        $ipv4 = pop(@chunks);
372        $expected_chunks--;
373    }
374
375    my $empty = 0;
376    # Workaround to handle trailing :: being valid.
377    if ($ip =~ /[0-9a-f]{1,4}::$/) {
378        $empty++;
379    # Single trailing ':' is invalid.
380    } elsif ($ip =~ /:$/) {
381        return;
382    }
383
384    foreach my $chunk (@chunks) {
385        return unless $chunk =~ /^[0-9a-f]{0,4}$/i;
386        $empty++ if $chunk eq '';
387    }
388    # More than one :: block is bad, but if it starts with :: it will
389    # look like two, so we need an exception.
390    if ($empty == 2 && $ip =~ /^::/) {
391        # This is ok
392    } elsif ($empty > 1) {
393        return;
394    }
395
396    push(@chunks, $ipv4) if $ipv4;
397    # Need 8 chunks, or we need an empty section that could be filled
398    # to represent the missing '0' sections.
399    return unless (@chunks == $expected_chunks || @chunks < $expected_chunks && $empty);
400
401    my $ipv6 = join(':', @chunks);
402    # The IP address is valid and can now be detainted.
403    trick_taint($ipv6);
404
405    # Need to handle the exception of trailing :: being valid.
406    return "${ipv6}::" if $ip =~ /::$/;
407    return $ipv6;
408}
409
410sub use_attachbase {
411    my $attachbase = Bugzilla->params->{'attachment_base'};
412    return ($attachbase ne ''
413            && $attachbase ne Bugzilla->params->{'urlbase'}
414            && $attachbase ne Bugzilla->params->{'sslbase'}) ? 1 : 0;
415}
416
417sub diff_arrays {
418    my ($old_ref, $new_ref, $attrib) = @_;
419    $attrib ||= 'name';
420
421    my (%counts, %pos);
422    # We are going to alter the old array.
423    my @old = @$old_ref;
424    my $i = 0;
425
426    # $counts{foo}-- means old, $counts{foo}++ means new.
427    # If $counts{foo} becomes positive, then we are adding new items,
428    # else we simply cancel one old existing item. Remaining items
429    # in the old list have been removed.
430    foreach (@old) {
431        next unless defined $_;
432        my $value = blessed($_) ? $_->$attrib : $_;
433        $counts{$value}--;
434        push @{$pos{$value}}, $i++;
435    }
436    my @added;
437    foreach (@$new_ref) {
438        next unless defined $_;
439        my $value = blessed($_) ? $_->$attrib : $_;
440        if (++$counts{$value} > 0) {
441            # Ignore empty strings, but objects having an empty string
442            # as attribute are fine.
443            push(@added, $_) unless ($value eq '' && !blessed($_));
444        }
445        else {
446            my $old_pos = shift @{$pos{$value}};
447            $old[$old_pos] = undef;
448        }
449    }
450    # Ignore canceled items as well as empty strings.
451    my @removed = grep { defined $_ && $_ ne '' } @old;
452    return (\@removed, \@added);
453}
454
455sub trim {
456    my ($str) = @_;
457    if ($str) {
458      $str =~ s/^\s+//g;
459      $str =~ s/\s+$//g;
460    }
461    return $str;
462}
463
464sub wrap_comment {
465    my ($comment, $cols) = @_;
466    my $wrappedcomment = "";
467
468    # Use 'local', as recommended by Text::Wrap's perldoc.
469    local $Text::Wrap::columns = $cols || COMMENT_COLS;
470    # Make words that are longer than COMMENT_COLS not wrap.
471    local $Text::Wrap::huge    = 'overflow';
472    # Don't mess with tabs.
473    local $Text::Wrap::unexpand = 0;
474
475    # If the line starts with ">", don't wrap it. Otherwise, wrap.
476    foreach my $line (split(/\r\n|\r|\n/, $comment)) {
477      if ($line =~ qr/^>/) {
478        $wrappedcomment .= ($line . "\n");
479      }
480      else {
481        $wrappedcomment .= (wrap('', '', $line) . "\n");
482      }
483    }
484
485    chomp($wrappedcomment); # Text::Wrap adds an extra newline at the end.
486    return $wrappedcomment;
487}
488
489sub find_wrap_point {
490    my ($string, $maxpos) = @_;
491    if (!$string) { return 0 }
492    if (length($string) < $maxpos) { return length($string) }
493    my $wrappoint = rindex($string, ",", $maxpos); # look for comma
494    if ($wrappoint <= 0) {  # can't find comma
495        $wrappoint = rindex($string, " ", $maxpos); # look for space
496        if ($wrappoint <= 0) {  # can't find space
497            $wrappoint = rindex($string, "-", $maxpos); # look for hyphen
498            if ($wrappoint <= 0) {  # can't find hyphen
499                $wrappoint = $maxpos;  # just truncate it
500            } else {
501                $wrappoint++; # leave hyphen on the left side
502            }
503        }
504    }
505    return $wrappoint;
506}
507
508sub join_activity_entries {
509    my ($field, $current_change, $new_change) = @_;
510    # We need to insert characters as these were removed by old
511    # LogActivityEntry code.
512
513    return $new_change if $current_change eq '';
514
515    # Buglists and see_also need the comma restored
516    if ($field eq 'dependson' || $field eq 'blocked' || $field eq 'see_also') {
517        if (substr($new_change, 0, 1) eq ',' || substr($new_change, 0, 1) eq ' ') {
518            return $current_change . $new_change;
519        } else {
520            return $current_change . ', ' . $new_change;
521        }
522    }
523
524    # Assume bug_file_loc contain a single url, don't insert a delimiter
525    if ($field eq 'bug_file_loc') {
526        return $current_change . $new_change;
527    }
528
529    # All other fields get a space unless the first character of the second
530    # string is a comma or space
531    if (substr($new_change, 0, 1) eq ',' || substr($new_change, 0, 1) eq ' ') {
532        return $current_change . $new_change;
533    } else {
534        return $current_change . ' ' . $new_change;
535    }
536}
537
538sub wrap_hard {
539    my ($string, $columns) = @_;
540    local $Text::Wrap::columns = $columns;
541    local $Text::Wrap::unexpand = 0;
542    local $Text::Wrap::huge = 'wrap';
543
544    my $wrapped = wrap('', '', $string);
545    chomp($wrapped);
546    return $wrapped;
547}
548
549sub format_time {
550    my ($date, $format, $timezone) = @_;
551
552    # If $format is not set, try to guess the correct date format.
553    if (!$format) {
554        if (!ref $date
555            && $date =~ /^(\d{4})[-\.](\d{2})[-\.](\d{2}) (\d{2}):(\d{2})(:(\d{2}))?$/)
556        {
557            my $sec = $7;
558            if (defined $sec) {
559                $format = "%Y-%m-%d %T %Z";
560            } else {
561                $format = "%Y-%m-%d %R %Z";
562            }
563        } else {
564            # Default date format. See DateTime for other formats available.
565            $format = "%Y-%m-%d %R %Z";
566        }
567    }
568
569    my $dt = ref $date ? $date : datetime_from($date, $timezone);
570    $date = defined $dt ? $dt->strftime($format) : '';
571    return trim($date);
572}
573
574sub datetime_from {
575    my ($date, $timezone) = @_;
576
577    # In the database, this is the "0" date.
578    return undef if $date =~ /^0000/;
579
580    my @time;
581    # Most dates will be in this format, avoid strptime's generic parser
582    if ($date =~ /^(\d{4})[\.-](\d{2})[\.-](\d{2})(?: (\d{2}):(\d{2}):(\d{2}))?$/) {
583        @time = ($6, $5, $4, $3, $2 - 1, $1 - 1900, undef);
584    }
585    else {
586        @time = strptime($date);
587    }
588
589    unless (scalar @time) {
590        # If an unknown timezone is passed (such as MSK, for Moskow),
591        # strptime() is unable to parse the date. We try again, but we first
592        # remove the timezone.
593        $date =~ s/\s+\S+$//;
594        @time = strptime($date);
595    }
596
597    return undef if !@time;
598
599    # strptime() counts years from 1900, except if they are older than 1901
600    # in which case it returns the full year (so 1890 -> 1890, but 1984 -> 84,
601    # and 3790 -> 1890). We make a guess and assume that 1100 <= year < 3000.
602    $time[5] += 1900 if $time[5] < 1100;
603
604    my %args = (
605        year   => $time[5],
606        # Months start from 0 (January).
607        month  => $time[4] + 1,
608        day    => $time[3],
609        hour   => $time[2],
610        minute => $time[1],
611        # DateTime doesn't like fractional seconds.
612        # Also, sometimes seconds are undef.
613        second => defined($time[0]) ? int($time[0]) : undef,
614        # If a timezone was specified, use it. Otherwise, use the
615        # local timezone.
616        time_zone => DateTime::TimeZone->offset_as_string($time[6])
617                     || Bugzilla->local_timezone,
618    );
619
620    # If something wasn't specified in the date, it's best to just not
621    # pass it to DateTime at all. (This is important for doing datetime_from
622    # on the deadline field, which is usually just a date with no time.)
623    foreach my $arg (keys %args) {
624        delete $args{$arg} if !defined $args{$arg};
625    }
626
627    # This module takes time to load and is only used here, so we
628    # |require| it here rather than |use| it.
629    require DateTime;
630    my $dt = new DateTime(\%args);
631
632    # Now display the date using the given timezone,
633    # or the user's timezone if none is given.
634    $dt->set_time_zone($timezone || Bugzilla->user->timezone);
635    return $dt;
636}
637
638sub bz_crypt {
639    my ($password, $salt) = @_;
640
641    my $algorithm;
642    if (!defined $salt) {
643        # If you don't use a salt, then people can create tables of
644        # hashes that map to particular passwords, and then break your
645        # hashing very easily if they have a large-enough table of common
646        # (or even uncommon) passwords. So we generate a unique salt for
647        # each password in the database, and then just prepend it to
648        # the hash.
649        $salt = generate_random_password(PASSWORD_SALT_LENGTH);
650        $algorithm = PASSWORD_DIGEST_ALGORITHM;
651    }
652
653    # We append the algorithm used to the string. This is good because then
654    # we can change the algorithm being used, in the future, without
655    # disrupting the validation of existing passwords. Also, this tells
656    # us if a password is using the old "crypt" method of hashing passwords,
657    # because the algorithm will be missing from the string.
658    if ($salt =~ /{([^}]+)}$/) {
659        $algorithm = $1;
660    }
661
662    # Wide characters cause crypt and Digest to die.
663    if (Bugzilla->params->{'utf8'}) {
664        utf8::encode($password) if utf8::is_utf8($password);
665    }
666
667    my $crypted_password;
668    if (!$algorithm) {
669        # Crypt the password.
670        $crypted_password = crypt($password, $salt);
671    }
672    else {
673        my $hasher = Digest->new($algorithm);
674        # Newly created salts won't yet have a comma.
675        ($salt) = $salt =~ /^([^,]+),?/;
676        $hasher->add($password, $salt);
677        $crypted_password = $salt . ',' . $hasher->b64digest . "{$algorithm}";
678    }
679
680    # Return the crypted password.
681    return $crypted_password;
682}
683
684# If you want to understand the security of strings generated by this
685# function, here's a quick formula that will help you estimate:
686# We pick from 62 characters, which is close to 64, which is 2^6.
687# So 8 characters is (2^6)^8 == 2^48 combinations. Just multiply 6
688# by the number of characters you generate, and that gets you the equivalent
689# strength of the string in bits.
690sub generate_random_password {
691    my $size = shift || 10; # default to 10 chars if nothing specified
692    return join("", map{ ('0'..'9','a'..'z','A'..'Z')[irand 62] } (1..$size));
693}
694
695sub validate_email_syntax {
696    my ($addr) = @_;
697    my $match = Bugzilla->params->{'emailregexp'};
698    my $email = $addr . Bugzilla->params->{'emailsuffix'};
699    # This regexp follows RFC 2822 section 3.4.1.
700    my $addr_spec = $Email::Address::addr_spec;
701    # RFC 2822 section 2.1 specifies that email addresses must
702    # be made of US-ASCII characters only.
703    # Email::Address::addr_spec doesn't enforce this.
704    # We set the max length to 127 to ensure addresses aren't truncated when
705    # inserted into the tokens.eventdata field.
706    if ($addr =~ /$match/
707        && $email !~ /\P{ASCII}/
708        && $email =~ /^$addr_spec$/
709        && length($email) <= 127)
710    {
711        # We assume these checks to suffice to consider the address untainted.
712        trick_taint($_[0]);
713        return 1;
714    }
715    return 0;
716}
717
718sub check_email_syntax {
719    my ($addr) = @_;
720
721    unless (validate_email_syntax(@_)) {
722        my $email = $addr . Bugzilla->params->{'emailsuffix'};
723        ThrowUserError('illegal_email_address', { addr => $email });
724    }
725}
726
727sub validate_date {
728    my ($date) = @_;
729    my $date2;
730
731    # $ts is undefined if the parser fails.
732    my $ts = str2time($date);
733    if ($ts) {
734        $date2 = time2str("%Y-%m-%d", $ts);
735
736        $date =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/;
737        $date2 =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/;
738    }
739    my $ret = ($ts && $date eq $date2);
740    return $ret ? 1 : 0;
741}
742
743sub validate_time {
744    my ($time) = @_;
745    my $time2;
746
747    # $ts is undefined if the parser fails.
748    my $ts = str2time($time);
749    if ($ts) {
750        $time2 = time2str("%H:%M:%S", $ts);
751        if ($time =~ /^(\d{1,2}):(\d\d)(?::(\d\d))?$/) {
752            $time = sprintf("%02d:%02d:%02d", $1, $2, $3 || 0);
753        }
754    }
755    my $ret = ($ts && $time eq $time2);
756    return $ret ? 1 : 0;
757}
758
759sub is_7bit_clean {
760    return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/;
761}
762
763sub clean_text {
764    my $dtext = shift;
765    if ($dtext) {
766        # change control characters into a space
767        $dtext =~ s/[\x00-\x1F\x7F]+/ /g;
768    }
769    return trim($dtext);
770}
771
772sub on_main_db (&) {
773    my $code = shift;
774    my $original_dbh = Bugzilla->dbh;
775    Bugzilla->request_cache->{dbh} = Bugzilla->dbh_main;
776    $code->();
777    Bugzilla->request_cache->{dbh} = $original_dbh;
778}
779
780sub get_text {
781    my ($name, $vars) = @_;
782    my $template = Bugzilla->template_inner;
783    $vars ||= {};
784    $vars->{'message'} = $name;
785    my $message;
786    $template->process('global/message.txt.tmpl', $vars, \$message)
787      || ThrowTemplateError($template->error());
788
789    # Remove the indenting that exists in messages.html.tmpl.
790    $message =~ s/^    //gm;
791    return $message;
792}
793
794sub template_var {
795    my $name = shift;
796    my $request_cache = Bugzilla->request_cache;
797    my $cache = $request_cache->{util_template_var} ||= {};
798    my $lang = $request_cache->{template_current_lang}->[0] || '';
799    return $cache->{$lang}->{$name} if defined $cache->{$lang};
800
801    my $template = Bugzilla->template_inner($lang);
802    my %vars;
803    # Note: If we suddenly start needing a lot of template_var variables,
804    # they should move into their own template, not field-descs.
805    $template->process('global/field-descs.none.tmpl',
806                       { vars => \%vars, in_template_var => 1 })
807      || ThrowTemplateError($template->error());
808
809    $cache->{$lang} = \%vars;
810    return $vars{$name};
811}
812
813sub display_value {
814    my ($field, $value) = @_;
815    return template_var('value_descs')->{$field}->{$value} // $value;
816}
817
818sub disable_utf8 {
819    if (Bugzilla->params->{'utf8'}) {
820        binmode STDOUT, ':bytes'; # Turn off UTF8 encoding.
821    }
822}
823
824use constant UTF8_ACCIDENTAL => qw(shiftjis big5-eten euc-kr euc-jp);
825
826sub detect_encoding {
827    my $data = shift;
828
829    Bugzilla->feature('detect_charset')
830      || ThrowUserError('feature_disabled', { feature => 'detect_charset' });
831
832    require Encode::Detect::Detector;
833    import Encode::Detect::Detector 'detect';
834
835    my $encoding = detect($data);
836    $encoding = resolve_alias($encoding) if $encoding;
837
838    # Encode::Detect is bad at detecting certain charsets, but Encode::Guess
839    # is better at them. Here's the details:
840
841    # shiftjis, big5-eten, euc-kr, and euc-jp: (Encode::Detect
842    # tends to accidentally mis-detect UTF-8 strings as being
843    # these encodings.)
844    if ($encoding && grep($_ eq $encoding, UTF8_ACCIDENTAL)) {
845        $encoding = undef;
846        my $decoder = guess_encoding($data, UTF8_ACCIDENTAL);
847        $encoding = $decoder->name if ref $decoder;
848    }
849
850    # Encode::Detect sometimes mis-detects various ISO encodings as iso-8859-8,
851    # or cp1255, but Encode::Guess can usually tell which one it is.
852    if ($encoding && ($encoding eq 'iso-8859-8' || $encoding eq 'cp1255')) {
853        my $decoded_as = _guess_iso($data, 'iso-8859-8',
854            # These are ordered this way because it gives the most
855            # accurate results.
856            qw(cp1252 iso-8859-7 iso-8859-2));
857        $encoding = $decoded_as if $decoded_as;
858    }
859
860    return $encoding;
861}
862
863# A helper for detect_encoding.
864sub _guess_iso {
865    my ($data, $versus, @isos) = (shift, shift, shift);
866
867    my $encoding;
868    foreach my $iso (@isos) {
869        my $decoder = guess_encoding($data, ($iso, $versus));
870        if (ref $decoder) {
871            $encoding = $decoder->name if ref $decoder;
872            last;
873        }
874    }
875    return $encoding;
876}
877
8781;
879
880__END__
881
882=head1 NAME
883
884Bugzilla::Util - Generic utility functions for bugzilla
885
886=head1 SYNOPSIS
887
888  use Bugzilla::Util;
889
890  # Functions for dealing with variable tainting
891  trick_taint($var);
892  detaint_natural($var);
893  detaint_signed($var);
894
895  # Functions for quoting
896  html_quote($var);
897  url_quote($var);
898  xml_quote($var);
899  email_filter($var);
900
901  # Functions that tell you about your environment
902  my $is_cgi   = i_am_cgi();
903  my $is_webservice = i_am_webservice();
904  my $urlbase  = correct_urlbase();
905
906  # Data manipulation
907  ($removed, $added) = diff_arrays(\@old, \@new);
908
909  # Functions for manipulating strings
910  $val = trim(" abc ");
911  $wrapped = wrap_comment($comment);
912
913  # Functions for formatting time
914  format_time($time);
915  datetime_from($time, $timezone);
916
917  # Cryptographic Functions
918  $crypted_password = bz_crypt($password);
919  $new_password = generate_random_password($password_length);
920
921  # Validation Functions
922  validate_email_syntax($email);
923  check_email_syntax($email);
924  validate_date($date);
925
926  # DB-related functions
927  on_main_db {
928     ... code here ...
929  };
930
931=head1 DESCRIPTION
932
933This package contains various utility functions which do not belong anywhere
934else.
935
936B<It is not intended as a general dumping group for something which
937people feel might be useful somewhere, someday>. Do not add methods to this
938package unless it is intended to be used for a significant number of files,
939and it does not belong anywhere else.
940
941=head1 FUNCTIONS
942
943This package provides several types of routines:
944
945=head2 Tainting
946
947Several functions are available to deal with tainted variables. B<Use these
948with care> to avoid security holes.
949
950=over 4
951
952=item C<trick_taint($val)>
953
954Tricks perl into untainting a particular variable.
955
956Use trick_taint() when you know that there is no way that the data
957in a scalar can be tainted, but taint mode still bails on it.
958
959B<WARNING!! Using this routine on data that really could be tainted defeats
960the purpose of taint mode.  It should only be used on variables that have been
961sanity checked in some way and have been determined to be OK.>
962
963=item C<detaint_natural($num)>
964
965This routine detaints a natural number. It returns a true value if the
966value passed in was a valid natural number, else it returns false. You
967B<MUST> check the result of this routine to avoid security holes.
968
969=item C<detaint_signed($num)>
970
971This routine detaints a signed integer. It returns a true value if the
972value passed in was a valid signed integer, else it returns false. You
973B<MUST> check the result of this routine to avoid security holes.
974
975=back
976
977=head2 Quoting
978
979Some values may need to be quoted from perl. However, this should in general
980be done in the template where possible.
981
982=over 4
983
984=item C<html_quote($val)>
985
986Returns a value quoted for use in HTML, with &, E<lt>, E<gt>, E<34> and @ being
987replaced with their appropriate HTML entities.  Also, Unicode BiDi controls are
988deleted.
989
990=item C<html_light_quote($val)>
991
992Returns a string where only explicitly allowed HTML elements and attributes
993are kept. All HTML elements and attributes not being in the whitelist are either
994escaped (if HTML::Scrubber is not installed) or removed.
995
996=item C<url_quote($val)>
997
998Quotes characters so that they may be included as part of a url.
999
1000=item C<css_class_quote($val)>
1001
1002Quotes characters so that they may be used as CSS class names. Spaces
1003and forward slashes are replaced by underscores.
1004
1005=item C<xml_quote($val)>
1006
1007This is similar to C<html_quote>, except that ' is escaped to &apos;. This
1008is kept separate from html_quote partly for compatibility with previous code
1009(for &apos;) and partly for future handling of non-ASCII characters.
1010
1011=item C<email_filter>
1012
1013Removes the hostname from email addresses in the string, if the user
1014currently viewing Bugzilla is logged out. If the user is logged-in,
1015this filter just returns the input string.
1016
1017=back
1018
1019=head2 Environment and Location
1020
1021Functions returning information about your environment or location.
1022
1023=over 4
1024
1025=item C<i_am_cgi()>
1026
1027Tells you whether or not you are being run as a CGI script in a web
1028server. For example, it would return false if the caller is running
1029in a command-line script.
1030
1031=item C<i_am_webservice()>
1032
1033Tells you whether or not the current usage mode is WebServices related
1034such as JSONRPC, XMLRPC, or REST.
1035
1036=item C<correct_urlbase()>
1037
1038Returns either the C<sslbase> or C<urlbase> parameter, depending on the
1039current setting for the C<ssl_redirect> parameter.
1040
1041=item C<remote_ip()>
1042
1043Returns the IP address of the remote client. If Bugzilla is behind
1044a trusted proxy, it will get the remote IP address by looking at the
1045X-Forwarded-For header.
1046
1047=item C<validate_ip($ip)>
1048
1049Returns the sanitized IP address if it is a valid IPv4 or IPv6 address,
1050else returns undef.
1051
1052=item C<use_attachbase()>
1053
1054Returns true if an alternate host is used to display attachments; false
1055otherwise.
1056
1057=back
1058
1059=head2 Data Manipulation
1060
1061=over 4
1062
1063=item C<diff_arrays(\@old, \@new)>
1064
1065 Description: Takes two arrayrefs, and will tell you what it takes to
1066              get from @old to @new.
1067 Params:      @old = array that you are changing from
1068              @new = array that you are changing to
1069 Returns:     A list of two arrayrefs. The first is a reference to an
1070              array containing items that were removed from @old. The
1071              second is a reference to an array containing items
1072              that were added to @old. If both returned arrays are
1073              empty, @old and @new contain the same values.
1074
1075=back
1076
1077=head2 String Manipulation
1078
1079=over 4
1080
1081=item C<trim($str)>
1082
1083Removes any leading or trailing whitespace from a string. This routine does not
1084modify the existing string.
1085
1086=item C<wrap_hard($string, $size)>
1087
1088Wraps a string, so that a line is I<never> longer than C<$size>.
1089Returns the string, wrapped.
1090
1091=item C<wrap_comment($comment)>
1092
1093Takes a bug comment, and wraps it to the appropriate length. The length is
1094currently specified in C<Bugzilla::Constants::COMMENT_COLS>. Lines beginning
1095with ">" are assumed to be quotes, and they will not be wrapped.
1096
1097The intended use of this function is to wrap comments that are about to be
1098displayed or emailed. Generally, wrapped text should not be stored in the
1099database.
1100
1101=item C<find_wrap_point($string, $maxpos)>
1102
1103Search for a comma, a whitespace or a hyphen to split $string, within the first
1104$maxpos characters. If none of them is found, just split $string at $maxpos.
1105The search starts at $maxpos and goes back to the beginning of the string.
1106
1107=item C<join_activity_entries($field, $current_change, $new_change)>
1108
1109Joins two strings together so they appear as one. The field name is specified
1110as the method of joining the two strings depends on this. Returns the
1111combined string.
1112
1113=item C<is_7bit_clean($str)>
1114
1115Returns true is the string contains only 7-bit characters (ASCII 32 through 126,
1116ASCII 10 (LineFeed) and ASCII 13 (Carrage Return).
1117
1118=item C<disable_utf8()>
1119
1120Disable utf8 on STDOUT (and display raw data instead).
1121
1122=item C<detect_encoding($str)>
1123
1124Guesses what encoding a given data is encoded in, returning the canonical name
1125of the detected encoding (which may be different from the MIME charset
1126specification).
1127
1128=item C<clean_text($str)>
1129Returns the parameter "cleaned" by exchanging non-printable characters with spaces.
1130Specifically characters (ASCII 0 through 31) and (ASCII 127) will become ASCII 32 (Space).
1131
1132=item C<get_text>
1133
1134=over
1135
1136=item B<Description>
1137
1138This is a method of getting localized strings within Bugzilla code.
1139Use this when you don't want to display a whole template, you just
1140want a particular string.
1141
1142It uses the F<global/message.txt.tmpl> template to return a string.
1143
1144=item B<Params>
1145
1146=over
1147
1148=item C<$message> - The identifier for the message.
1149
1150=item C<$vars> - A hashref. Any variables you want to pass to the template.
1151
1152=back
1153
1154=item B<Returns>
1155
1156A string.
1157
1158=back
1159
1160
1161=item C<template_var>
1162
1163This is a method of getting the value of a variable from a template in
1164Perl code. The available variables are in the C<global/field-descs.none.tmpl>
1165template. Just pass in the name of the variable that you want the value of.
1166
1167
1168=back
1169
1170=head2 Formatting Time
1171
1172=over 4
1173
1174=item C<format_time($time)>
1175
1176Takes a time and converts it to the desired format and timezone.
1177If no format is given, the routine guesses the correct one and returns
1178an empty array if it cannot. If no timezone is given, the user's timezone
1179is used, as defined in their preferences.
1180
1181This routine is mainly called from templates to filter dates, see
1182"FILTER time" in L<Bugzilla::Template>.
1183
1184=item C<datetime_from($time, $timezone)>
1185
1186Returns a DateTime object given a date string. If the string is not in some
1187valid date format that C<strptime> understands, we return C<undef>.
1188
1189You can optionally specify a timezone for the returned date. If not
1190specified, defaults to the currently-logged-in user's timezone, or
1191the Bugzilla server's local timezone if there isn't a logged-in user.
1192
1193=back
1194
1195=head2 Cryptography
1196
1197=over 4
1198
1199=item C<bz_crypt($password, $salt)>
1200
1201Takes a string and returns a hashed (encrypted) value for it, using a
1202random salt. An optional salt string may also be passed in.
1203
1204Please always use this function instead of the built-in perl C<crypt>
1205function, when checking or setting a password. Bugzilla does not use
1206C<crypt>.
1207
1208=begin undocumented
1209
1210Random salts are generated because the alternative is usually
1211to use the first two characters of the password itself, and since
1212the salt appears in plaintext at the beginning of the encrypted
1213password string this has the effect of revealing the first two
1214characters of the password to anyone who views the encrypted version.
1215
1216=end undocumented
1217
1218=item C<generate_random_password($password_length)>
1219
1220Returns an alphanumeric string with the specified length
1221(10 characters by default). Use this function to generate passwords
1222and tokens.
1223
1224=back
1225
1226=head2 Validation
1227
1228=over 4
1229
1230=item C<validate_email_syntax($email)>
1231
1232Do a syntax checking for a legal email address and returns 1 if
1233the check is successful, else returns 0.
1234Untaints C<$email> if successful.
1235
1236=item C<check_email_syntax($email)>
1237
1238Do a syntax checking for a legal email address and throws an error
1239if the check fails.
1240Untaints C<$email> if successful.
1241
1242=item C<validate_date($date)>
1243
1244Make sure the date has the correct format and returns 1 if
1245the check is successful, else returns 0.
1246
1247=back
1248
1249=head2 Database
1250
1251=over
1252
1253=item C<on_main_db>
1254
1255Runs a block of code always on the main DB. Useful for when you're inside
1256a subroutine and need to do some writes to the database, but don't know
1257if Bugzilla is currently using the shadowdb or not. Used like:
1258
1259 on_main_db {
1260     my $dbh = Bugzilla->dbh;
1261     $dbh->do("INSERT ...");
1262 }
1263
1264=back
1265
1266=head1 B<Methods in need of POD>
1267
1268=over
1269
1270=item do_ssl_redirect_if_required
1271
1272=item validate_time
1273
1274=item is_ipv4
1275
1276=item is_ipv6
1277
1278=item display_value
1279
1280=back
1281