1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4# 5# This Source Code Form is "Incompatible With Secondary Licenses", as 6# defined by the Mozilla Public License, v. 2.0. 7 8package Bugzilla::Util; 9 10use 5.10.1; 11use strict; 12use warnings; 13 14use parent qw(Exporter); 15@Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed 16 html_quote url_quote xml_quote 17 css_class_quote html_light_quote 18 i_am_cgi i_am_webservice correct_urlbase remote_ip 19 validate_ip do_ssl_redirect_if_required use_attachbase 20 diff_arrays on_main_db 21 trim wrap_hard wrap_comment find_wrap_point 22 format_time validate_date validate_time datetime_from 23 is_7bit_clean bz_crypt generate_random_password 24 validate_email_syntax check_email_syntax clean_text 25 get_text template_var display_value disable_utf8 26 detect_encoding email_filter 27 join_activity_entries read_text write_text); 28 29use Bugzilla::Constants; 30use Bugzilla::RNG qw(irand); 31use Bugzilla::Error; 32 33use Date::Parse; 34use Date::Format; 35use Digest; 36use Email::Address; 37use List::Util qw(first); 38use Scalar::Util qw(tainted blessed); 39use Text::Wrap; 40use Encode qw(encode decode resolve_alias); 41use Encode::Guess; 42use File::Basename qw(dirname); 43use File::Temp qw(tempfile); 44 45sub trick_taint { 46 require Carp; 47 Carp::confess("Undef to trick_taint") unless defined $_[0]; 48 my $match = $_[0] =~ /^(.*)$/s; 49 $_[0] = $match ? $1 : undef; 50 return (defined($_[0])); 51} 52 53sub detaint_natural { 54 my $match = $_[0] =~ /^([0-9]+)$/; 55 $_[0] = $match ? int($1) : undef; 56 return (defined($_[0])); 57} 58 59sub detaint_signed { 60 my $match = $_[0] =~ /^([-+]?[0-9]+)$/; 61 # The "int()" call removes any leading plus sign. 62 $_[0] = $match ? int($1) : undef; 63 return (defined($_[0])); 64} 65 66# Bug 120030: Override html filter to obscure the '@' in user 67# visible strings. 68# Bug 319331: Handle BiDi disruptions. 69sub html_quote { 70 my $var = shift; 71 $var =~ s/&/&/g; 72 $var =~ s/</</g; 73 $var =~ s/>/>/g; 74 $var =~ s/"/"/g; 75 # Obscure '@'. 76 $var =~ s/\@/\@/g; 77 78 state $use_utf8 = Bugzilla->params->{'utf8'}; 79 80 if ($use_utf8) { 81 # Remove control characters if the encoding is utf8. 82 # Other multibyte encodings may be using this range; so ignore if not utf8. 83 $var =~ s/(?![\t\r\n])[[:cntrl:]]//g; 84 85 # Remove the following characters because they're 86 # influencing BiDi: 87 # -------------------------------------------------------- 88 # |Code |Name |UTF-8 representation| 89 # |------|--------------------------|--------------------| 90 # |U+202a|Left-To-Right Embedding |0xe2 0x80 0xaa | 91 # |U+202b|Right-To-Left Embedding |0xe2 0x80 0xab | 92 # |U+202c|Pop Directional Formatting|0xe2 0x80 0xac | 93 # |U+202d|Left-To-Right Override |0xe2 0x80 0xad | 94 # |U+202e|Right-To-Left Override |0xe2 0x80 0xae | 95 # -------------------------------------------------------- 96 # 97 # The following are characters influencing BiDi, too, but 98 # they can be spared from filtering because they don't 99 # influence more than one character right or left: 100 # -------------------------------------------------------- 101 # |Code |Name |UTF-8 representation| 102 # |------|--------------------------|--------------------| 103 # |U+200e|Left-To-Right Mark |0xe2 0x80 0x8e | 104 # |U+200f|Right-To-Left Mark |0xe2 0x80 0x8f | 105 # -------------------------------------------------------- 106 $var =~ tr/\x{202a}-\x{202e}//d; 107 } 108 return $var; 109} 110 111sub read_text { 112 my ($filename) = @_; 113 open my $fh, '<:encoding(utf-8)', $filename; 114 local $/ = undef; 115 my $content = <$fh>; 116 close $fh; 117 return $content; 118} 119 120sub write_text { 121 my ($filename, $content) = @_; 122 my ($tmp_fh, $tmp_filename) = tempfile('.tmp.XXXXXXXXXX', 123 DIR => dirname($filename), 124 UNLINK => 0, 125 ); 126 binmode $tmp_fh, ':encoding(utf-8)'; 127 print $tmp_fh $content; 128 close $tmp_fh; 129 # File::Temp tries for secure files, but File::Slurp used the umask. 130 chmod(0666 & ~umask, $tmp_filename); 131 rename $tmp_filename, $filename; 132} 133 134sub html_light_quote { 135 my ($text) = @_; 136 # admin/table.html.tmpl calls |FILTER html_light| many times. 137 # There is no need to recreate the HTML::Scrubber object again and again. 138 my $scrubber = Bugzilla->process_cache->{html_scrubber}; 139 140 # List of allowed HTML elements having no attributes. 141 my @allow = qw(b strong em i u p br abbr acronym ins del cite code var 142 dfn samp kbd big small sub sup tt dd dt dl ul li ol 143 fieldset legend); 144 145 if (!Bugzilla->feature('html_desc')) { 146 my $safe = join('|', @allow); 147 my $chr = chr(1); 148 149 # First, escape safe elements. 150 $text =~ s#<($safe)>#$chr$1$chr#go; 151 $text =~ s#</($safe)>#$chr/$1$chr#go; 152 # Now filter < and >. 153 $text =~ s#<#<#g; 154 $text =~ s#>#>#g; 155 # Restore safe elements. 156 $text =~ s#$chr/($safe)$chr#</$1>#go; 157 $text =~ s#$chr($safe)$chr#<$1>#go; 158 return $text; 159 } 160 elsif (!$scrubber) { 161 # We can be less restrictive. We can accept elements with attributes. 162 push(@allow, qw(a blockquote q span)); 163 164 # Allowed protocols. 165 my $safe_protocols = join('|', SAFE_PROTOCOLS); 166 my $protocol_regexp = qr{(^(?:$safe_protocols):|^[^:]+$)}i; 167 168 # Deny all elements and attributes unless explicitly authorized. 169 my @default = (0 => { 170 id => 1, 171 name => 1, 172 class => 1, 173 '*' => 0, # Reject all other attributes. 174 } 175 ); 176 177 # Specific rules for allowed elements. If no specific rule is set 178 # for a given element, then the default is used. 179 my @rules = (a => { 180 href => $protocol_regexp, 181 target => qr{^(?:_blank|_parent|_self|_top)$}i, 182 title => 1, 183 id => 1, 184 name => 1, 185 class => 1, 186 '*' => 0, # Reject all other attributes. 187 }, 188 blockquote => { 189 cite => $protocol_regexp, 190 id => 1, 191 name => 1, 192 class => 1, 193 '*' => 0, # Reject all other attributes. 194 }, 195 'q' => { 196 cite => $protocol_regexp, 197 id => 1, 198 name => 1, 199 class => 1, 200 '*' => 0, # Reject all other attributes. 201 }, 202 ); 203 204 Bugzilla->process_cache->{html_scrubber} = $scrubber = 205 HTML::Scrubber->new(default => \@default, 206 allow => \@allow, 207 rules => \@rules, 208 comment => 0, 209 process => 0); 210 } 211 return $scrubber->scrub($text); 212} 213 214sub email_filter { 215 my ($toencode) = @_; 216 if (!Bugzilla->user->id) { 217 my @emails = Email::Address->parse($toencode); 218 if (scalar @emails) { 219 my @hosts = map { quotemeta($_->host) } @emails; 220 my $hosts_re = join('|', @hosts); 221 $toencode =~ s/\@(?:$hosts_re)//g; 222 return $toencode; 223 } 224 } 225 return $toencode; 226} 227 228# This originally came from CGI.pm, by Lincoln D. Stein 229sub url_quote { 230 my ($toencode) = (@_); 231 utf8::encode($toencode) # The below regex works only on bytes 232 if Bugzilla->params->{'utf8'} && utf8::is_utf8($toencode); 233 $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg; 234 return $toencode; 235} 236 237sub css_class_quote { 238 my ($toencode) = (@_); 239 $toencode =~ s#[ /]#_#g; 240 $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("&#x%x;",ord($1))/eg; 241 return $toencode; 242} 243 244sub xml_quote { 245 my ($var) = (@_); 246 $var =~ s/\&/\&/g; 247 $var =~ s/</\</g; 248 $var =~ s/>/\>/g; 249 $var =~ s/\"/\"/g; 250 $var =~ s/\'/\'/g; 251 252 # the following nukes characters disallowed by the XML 1.0 253 # spec, Production 2.2. 1.0 declares that only the following 254 # are valid: 255 # (#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]) 256 $var =~ s/([\x{0001}-\x{0008}]| 257 [\x{000B}-\x{000C}]| 258 [\x{000E}-\x{001F}]| 259 [\x{D800}-\x{DFFF}]| 260 [\x{FFFE}-\x{FFFF}])//gx; 261 return $var; 262} 263 264sub i_am_cgi { 265 # I use SERVER_SOFTWARE because it's required to be 266 # defined for all requests in the CGI spec. 267 return exists $ENV{'SERVER_SOFTWARE'} ? 1 : 0; 268} 269 270sub i_am_webservice { 271 my $usage_mode = Bugzilla->usage_mode; 272 return $usage_mode == USAGE_MODE_XMLRPC 273 || $usage_mode == USAGE_MODE_JSON 274 || $usage_mode == USAGE_MODE_REST; 275} 276 277# This exists as a separate function from Bugzilla::CGI::redirect_to_https 278# because we don't want to create a CGI object during XML-RPC calls 279# (doing so can mess up XML-RPC). 280sub do_ssl_redirect_if_required { 281 return if !i_am_cgi(); 282 return if !Bugzilla->params->{'ssl_redirect'}; 283 284 my $sslbase = Bugzilla->params->{'sslbase'}; 285 286 # If we're already running under SSL, never redirect. 287 return if uc($ENV{HTTPS} || '') eq 'ON'; 288 # Never redirect if there isn't an sslbase. 289 return if !$sslbase; 290 Bugzilla->cgi->redirect_to_https(); 291} 292 293sub correct_urlbase { 294 my $ssl = Bugzilla->params->{'ssl_redirect'}; 295 my $urlbase = Bugzilla->params->{'urlbase'}; 296 my $sslbase = Bugzilla->params->{'sslbase'}; 297 298 if (!$sslbase) { 299 return $urlbase; 300 } 301 elsif ($ssl) { 302 return $sslbase; 303 } 304 else { 305 # Return what the user currently uses. 306 return (uc($ENV{HTTPS} || '') eq 'ON') ? $sslbase : $urlbase; 307 } 308} 309 310sub remote_ip { 311 my $ip = $ENV{'REMOTE_ADDR'} || '127.0.0.1'; 312 my @proxies = split(/[\s,]+/, Bugzilla->params->{'inbound_proxies'}); 313 314 # If the IP address is one of our trusted proxies, then we look at 315 # the X-Forwarded-For header to determine the real remote IP address. 316 if ($ENV{'HTTP_X_FORWARDED_FOR'} && first { $_ eq $ip } @proxies) { 317 my @ips = split(/[\s,]+/, $ENV{'HTTP_X_FORWARDED_FOR'}); 318 # This header can contain several IP addresses. We want the 319 # IP address of the machine which connected to our proxies as 320 # all other IP addresses may be fake or internal ones. 321 # Note that this may block a whole external proxy, but we have 322 # no way to determine if this proxy is malicious or trustable. 323 foreach my $remote_ip (reverse @ips) { 324 if (!first { $_ eq $remote_ip } @proxies) { 325 # Keep the original IP address if the remote IP is invalid. 326 $ip = validate_ip($remote_ip) || $ip; 327 last; 328 } 329 } 330 } 331 return $ip; 332} 333 334sub validate_ip { 335 my $ip = shift; 336 return is_ipv4($ip) || is_ipv6($ip); 337} 338 339# Copied from Data::Validate::IP::is_ipv4(). 340sub is_ipv4 { 341 my $ip = shift; 342 return unless defined $ip; 343 344 my @octets = $ip =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/; 345 return unless scalar(@octets) == 4; 346 347 foreach my $octet (@octets) { 348 return unless ($octet >= 0 && $octet <= 255 && $octet !~ /^0\d{1,2}$/); 349 } 350 351 # The IP address is valid and can now be detainted. 352 return join('.', @octets); 353} 354 355# Copied from Data::Validate::IP::is_ipv6(). 356sub is_ipv6 { 357 my $ip = shift; 358 return unless defined $ip; 359 360 # If there is a :: then there must be only one :: and the length 361 # can be variable. Without it, the length must be 8 groups. 362 my @chunks = split(':', $ip); 363 364 # Need to check if the last chunk is an IPv4 address, if it is we 365 # pop it off and exempt it from the normal IPv6 checking and stick 366 # it back on at the end. If there is only one chunk and it's an IPv4 367 # address, then it isn't an IPv6 address. 368 my $ipv4; 369 my $expected_chunks = 8; 370 if (@chunks > 1 && is_ipv4($chunks[$#chunks])) { 371 $ipv4 = pop(@chunks); 372 $expected_chunks--; 373 } 374 375 my $empty = 0; 376 # Workaround to handle trailing :: being valid. 377 if ($ip =~ /[0-9a-f]{1,4}::$/) { 378 $empty++; 379 # Single trailing ':' is invalid. 380 } elsif ($ip =~ /:$/) { 381 return; 382 } 383 384 foreach my $chunk (@chunks) { 385 return unless $chunk =~ /^[0-9a-f]{0,4}$/i; 386 $empty++ if $chunk eq ''; 387 } 388 # More than one :: block is bad, but if it starts with :: it will 389 # look like two, so we need an exception. 390 if ($empty == 2 && $ip =~ /^::/) { 391 # This is ok 392 } elsif ($empty > 1) { 393 return; 394 } 395 396 push(@chunks, $ipv4) if $ipv4; 397 # Need 8 chunks, or we need an empty section that could be filled 398 # to represent the missing '0' sections. 399 return unless (@chunks == $expected_chunks || @chunks < $expected_chunks && $empty); 400 401 my $ipv6 = join(':', @chunks); 402 # The IP address is valid and can now be detainted. 403 trick_taint($ipv6); 404 405 # Need to handle the exception of trailing :: being valid. 406 return "${ipv6}::" if $ip =~ /::$/; 407 return $ipv6; 408} 409 410sub use_attachbase { 411 my $attachbase = Bugzilla->params->{'attachment_base'}; 412 return ($attachbase ne '' 413 && $attachbase ne Bugzilla->params->{'urlbase'} 414 && $attachbase ne Bugzilla->params->{'sslbase'}) ? 1 : 0; 415} 416 417sub diff_arrays { 418 my ($old_ref, $new_ref, $attrib) = @_; 419 $attrib ||= 'name'; 420 421 my (%counts, %pos); 422 # We are going to alter the old array. 423 my @old = @$old_ref; 424 my $i = 0; 425 426 # $counts{foo}-- means old, $counts{foo}++ means new. 427 # If $counts{foo} becomes positive, then we are adding new items, 428 # else we simply cancel one old existing item. Remaining items 429 # in the old list have been removed. 430 foreach (@old) { 431 next unless defined $_; 432 my $value = blessed($_) ? $_->$attrib : $_; 433 $counts{$value}--; 434 push @{$pos{$value}}, $i++; 435 } 436 my @added; 437 foreach (@$new_ref) { 438 next unless defined $_; 439 my $value = blessed($_) ? $_->$attrib : $_; 440 if (++$counts{$value} > 0) { 441 # Ignore empty strings, but objects having an empty string 442 # as attribute are fine. 443 push(@added, $_) unless ($value eq '' && !blessed($_)); 444 } 445 else { 446 my $old_pos = shift @{$pos{$value}}; 447 $old[$old_pos] = undef; 448 } 449 } 450 # Ignore canceled items as well as empty strings. 451 my @removed = grep { defined $_ && $_ ne '' } @old; 452 return (\@removed, \@added); 453} 454 455sub trim { 456 my ($str) = @_; 457 if ($str) { 458 $str =~ s/^\s+//g; 459 $str =~ s/\s+$//g; 460 } 461 return $str; 462} 463 464sub wrap_comment { 465 my ($comment, $cols) = @_; 466 my $wrappedcomment = ""; 467 468 # Use 'local', as recommended by Text::Wrap's perldoc. 469 local $Text::Wrap::columns = $cols || COMMENT_COLS; 470 # Make words that are longer than COMMENT_COLS not wrap. 471 local $Text::Wrap::huge = 'overflow'; 472 # Don't mess with tabs. 473 local $Text::Wrap::unexpand = 0; 474 475 # If the line starts with ">", don't wrap it. Otherwise, wrap. 476 foreach my $line (split(/\r\n|\r|\n/, $comment)) { 477 if ($line =~ qr/^>/) { 478 $wrappedcomment .= ($line . "\n"); 479 } 480 else { 481 $wrappedcomment .= (wrap('', '', $line) . "\n"); 482 } 483 } 484 485 chomp($wrappedcomment); # Text::Wrap adds an extra newline at the end. 486 return $wrappedcomment; 487} 488 489sub find_wrap_point { 490 my ($string, $maxpos) = @_; 491 if (!$string) { return 0 } 492 if (length($string) < $maxpos) { return length($string) } 493 my $wrappoint = rindex($string, ",", $maxpos); # look for comma 494 if ($wrappoint <= 0) { # can't find comma 495 $wrappoint = rindex($string, " ", $maxpos); # look for space 496 if ($wrappoint <= 0) { # can't find space 497 $wrappoint = rindex($string, "-", $maxpos); # look for hyphen 498 if ($wrappoint <= 0) { # can't find hyphen 499 $wrappoint = $maxpos; # just truncate it 500 } else { 501 $wrappoint++; # leave hyphen on the left side 502 } 503 } 504 } 505 return $wrappoint; 506} 507 508sub join_activity_entries { 509 my ($field, $current_change, $new_change) = @_; 510 # We need to insert characters as these were removed by old 511 # LogActivityEntry code. 512 513 return $new_change if $current_change eq ''; 514 515 # Buglists and see_also need the comma restored 516 if ($field eq 'dependson' || $field eq 'blocked' || $field eq 'see_also') { 517 if (substr($new_change, 0, 1) eq ',' || substr($new_change, 0, 1) eq ' ') { 518 return $current_change . $new_change; 519 } else { 520 return $current_change . ', ' . $new_change; 521 } 522 } 523 524 # Assume bug_file_loc contain a single url, don't insert a delimiter 525 if ($field eq 'bug_file_loc') { 526 return $current_change . $new_change; 527 } 528 529 # All other fields get a space unless the first character of the second 530 # string is a comma or space 531 if (substr($new_change, 0, 1) eq ',' || substr($new_change, 0, 1) eq ' ') { 532 return $current_change . $new_change; 533 } else { 534 return $current_change . ' ' . $new_change; 535 } 536} 537 538sub wrap_hard { 539 my ($string, $columns) = @_; 540 local $Text::Wrap::columns = $columns; 541 local $Text::Wrap::unexpand = 0; 542 local $Text::Wrap::huge = 'wrap'; 543 544 my $wrapped = wrap('', '', $string); 545 chomp($wrapped); 546 return $wrapped; 547} 548 549sub format_time { 550 my ($date, $format, $timezone) = @_; 551 552 # If $format is not set, try to guess the correct date format. 553 if (!$format) { 554 if (!ref $date 555 && $date =~ /^(\d{4})[-\.](\d{2})[-\.](\d{2}) (\d{2}):(\d{2})(:(\d{2}))?$/) 556 { 557 my $sec = $7; 558 if (defined $sec) { 559 $format = "%Y-%m-%d %T %Z"; 560 } else { 561 $format = "%Y-%m-%d %R %Z"; 562 } 563 } else { 564 # Default date format. See DateTime for other formats available. 565 $format = "%Y-%m-%d %R %Z"; 566 } 567 } 568 569 my $dt = ref $date ? $date : datetime_from($date, $timezone); 570 $date = defined $dt ? $dt->strftime($format) : ''; 571 return trim($date); 572} 573 574sub datetime_from { 575 my ($date, $timezone) = @_; 576 577 # In the database, this is the "0" date. 578 return undef if $date =~ /^0000/; 579 580 my @time; 581 # Most dates will be in this format, avoid strptime's generic parser 582 if ($date =~ /^(\d{4})[\.-](\d{2})[\.-](\d{2})(?: (\d{2}):(\d{2}):(\d{2}))?$/) { 583 @time = ($6, $5, $4, $3, $2 - 1, $1 - 1900, undef); 584 } 585 else { 586 @time = strptime($date); 587 } 588 589 unless (scalar @time) { 590 # If an unknown timezone is passed (such as MSK, for Moskow), 591 # strptime() is unable to parse the date. We try again, but we first 592 # remove the timezone. 593 $date =~ s/\s+\S+$//; 594 @time = strptime($date); 595 } 596 597 return undef if !@time; 598 599 # strptime() counts years from 1900, except if they are older than 1901 600 # in which case it returns the full year (so 1890 -> 1890, but 1984 -> 84, 601 # and 3790 -> 1890). We make a guess and assume that 1100 <= year < 3000. 602 $time[5] += 1900 if $time[5] < 1100; 603 604 my %args = ( 605 year => $time[5], 606 # Months start from 0 (January). 607 month => $time[4] + 1, 608 day => $time[3], 609 hour => $time[2], 610 minute => $time[1], 611 # DateTime doesn't like fractional seconds. 612 # Also, sometimes seconds are undef. 613 second => defined($time[0]) ? int($time[0]) : undef, 614 # If a timezone was specified, use it. Otherwise, use the 615 # local timezone. 616 time_zone => DateTime::TimeZone->offset_as_string($time[6]) 617 || Bugzilla->local_timezone, 618 ); 619 620 # If something wasn't specified in the date, it's best to just not 621 # pass it to DateTime at all. (This is important for doing datetime_from 622 # on the deadline field, which is usually just a date with no time.) 623 foreach my $arg (keys %args) { 624 delete $args{$arg} if !defined $args{$arg}; 625 } 626 627 # This module takes time to load and is only used here, so we 628 # |require| it here rather than |use| it. 629 require DateTime; 630 my $dt = new DateTime(\%args); 631 632 # Now display the date using the given timezone, 633 # or the user's timezone if none is given. 634 $dt->set_time_zone($timezone || Bugzilla->user->timezone); 635 return $dt; 636} 637 638sub bz_crypt { 639 my ($password, $salt) = @_; 640 641 my $algorithm; 642 if (!defined $salt) { 643 # If you don't use a salt, then people can create tables of 644 # hashes that map to particular passwords, and then break your 645 # hashing very easily if they have a large-enough table of common 646 # (or even uncommon) passwords. So we generate a unique salt for 647 # each password in the database, and then just prepend it to 648 # the hash. 649 $salt = generate_random_password(PASSWORD_SALT_LENGTH); 650 $algorithm = PASSWORD_DIGEST_ALGORITHM; 651 } 652 653 # We append the algorithm used to the string. This is good because then 654 # we can change the algorithm being used, in the future, without 655 # disrupting the validation of existing passwords. Also, this tells 656 # us if a password is using the old "crypt" method of hashing passwords, 657 # because the algorithm will be missing from the string. 658 if ($salt =~ /{([^}]+)}$/) { 659 $algorithm = $1; 660 } 661 662 # Wide characters cause crypt and Digest to die. 663 if (Bugzilla->params->{'utf8'}) { 664 utf8::encode($password) if utf8::is_utf8($password); 665 } 666 667 my $crypted_password; 668 if (!$algorithm) { 669 # Crypt the password. 670 $crypted_password = crypt($password, $salt); 671 } 672 else { 673 my $hasher = Digest->new($algorithm); 674 # Newly created salts won't yet have a comma. 675 ($salt) = $salt =~ /^([^,]+),?/; 676 $hasher->add($password, $salt); 677 $crypted_password = $salt . ',' . $hasher->b64digest . "{$algorithm}"; 678 } 679 680 # Return the crypted password. 681 return $crypted_password; 682} 683 684# If you want to understand the security of strings generated by this 685# function, here's a quick formula that will help you estimate: 686# We pick from 62 characters, which is close to 64, which is 2^6. 687# So 8 characters is (2^6)^8 == 2^48 combinations. Just multiply 6 688# by the number of characters you generate, and that gets you the equivalent 689# strength of the string in bits. 690sub generate_random_password { 691 my $size = shift || 10; # default to 10 chars if nothing specified 692 return join("", map{ ('0'..'9','a'..'z','A'..'Z')[irand 62] } (1..$size)); 693} 694 695sub validate_email_syntax { 696 my ($addr) = @_; 697 my $match = Bugzilla->params->{'emailregexp'}; 698 my $email = $addr . Bugzilla->params->{'emailsuffix'}; 699 # This regexp follows RFC 2822 section 3.4.1. 700 my $addr_spec = $Email::Address::addr_spec; 701 # RFC 2822 section 2.1 specifies that email addresses must 702 # be made of US-ASCII characters only. 703 # Email::Address::addr_spec doesn't enforce this. 704 # We set the max length to 127 to ensure addresses aren't truncated when 705 # inserted into the tokens.eventdata field. 706 if ($addr =~ /$match/ 707 && $email !~ /\P{ASCII}/ 708 && $email =~ /^$addr_spec$/ 709 && length($email) <= 127) 710 { 711 # We assume these checks to suffice to consider the address untainted. 712 trick_taint($_[0]); 713 return 1; 714 } 715 return 0; 716} 717 718sub check_email_syntax { 719 my ($addr) = @_; 720 721 unless (validate_email_syntax(@_)) { 722 my $email = $addr . Bugzilla->params->{'emailsuffix'}; 723 ThrowUserError('illegal_email_address', { addr => $email }); 724 } 725} 726 727sub validate_date { 728 my ($date) = @_; 729 my $date2; 730 731 # $ts is undefined if the parser fails. 732 my $ts = str2time($date); 733 if ($ts) { 734 $date2 = time2str("%Y-%m-%d", $ts); 735 736 $date =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/; 737 $date2 =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/; 738 } 739 my $ret = ($ts && $date eq $date2); 740 return $ret ? 1 : 0; 741} 742 743sub validate_time { 744 my ($time) = @_; 745 my $time2; 746 747 # $ts is undefined if the parser fails. 748 my $ts = str2time($time); 749 if ($ts) { 750 $time2 = time2str("%H:%M:%S", $ts); 751 if ($time =~ /^(\d{1,2}):(\d\d)(?::(\d\d))?$/) { 752 $time = sprintf("%02d:%02d:%02d", $1, $2, $3 || 0); 753 } 754 } 755 my $ret = ($ts && $time eq $time2); 756 return $ret ? 1 : 0; 757} 758 759sub is_7bit_clean { 760 return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/; 761} 762 763sub clean_text { 764 my $dtext = shift; 765 if ($dtext) { 766 # change control characters into a space 767 $dtext =~ s/[\x00-\x1F\x7F]+/ /g; 768 } 769 return trim($dtext); 770} 771 772sub on_main_db (&) { 773 my $code = shift; 774 my $original_dbh = Bugzilla->dbh; 775 Bugzilla->request_cache->{dbh} = Bugzilla->dbh_main; 776 $code->(); 777 Bugzilla->request_cache->{dbh} = $original_dbh; 778} 779 780sub get_text { 781 my ($name, $vars) = @_; 782 my $template = Bugzilla->template_inner; 783 $vars ||= {}; 784 $vars->{'message'} = $name; 785 my $message; 786 $template->process('global/message.txt.tmpl', $vars, \$message) 787 || ThrowTemplateError($template->error()); 788 789 # Remove the indenting that exists in messages.html.tmpl. 790 $message =~ s/^ //gm; 791 return $message; 792} 793 794sub template_var { 795 my $name = shift; 796 my $request_cache = Bugzilla->request_cache; 797 my $cache = $request_cache->{util_template_var} ||= {}; 798 my $lang = $request_cache->{template_current_lang}->[0] || ''; 799 return $cache->{$lang}->{$name} if defined $cache->{$lang}; 800 801 my $template = Bugzilla->template_inner($lang); 802 my %vars; 803 # Note: If we suddenly start needing a lot of template_var variables, 804 # they should move into their own template, not field-descs. 805 $template->process('global/field-descs.none.tmpl', 806 { vars => \%vars, in_template_var => 1 }) 807 || ThrowTemplateError($template->error()); 808 809 $cache->{$lang} = \%vars; 810 return $vars{$name}; 811} 812 813sub display_value { 814 my ($field, $value) = @_; 815 return template_var('value_descs')->{$field}->{$value} // $value; 816} 817 818sub disable_utf8 { 819 if (Bugzilla->params->{'utf8'}) { 820 binmode STDOUT, ':bytes'; # Turn off UTF8 encoding. 821 } 822} 823 824use constant UTF8_ACCIDENTAL => qw(shiftjis big5-eten euc-kr euc-jp); 825 826sub detect_encoding { 827 my $data = shift; 828 829 Bugzilla->feature('detect_charset') 830 || ThrowUserError('feature_disabled', { feature => 'detect_charset' }); 831 832 require Encode::Detect::Detector; 833 import Encode::Detect::Detector 'detect'; 834 835 my $encoding = detect($data); 836 $encoding = resolve_alias($encoding) if $encoding; 837 838 # Encode::Detect is bad at detecting certain charsets, but Encode::Guess 839 # is better at them. Here's the details: 840 841 # shiftjis, big5-eten, euc-kr, and euc-jp: (Encode::Detect 842 # tends to accidentally mis-detect UTF-8 strings as being 843 # these encodings.) 844 if ($encoding && grep($_ eq $encoding, UTF8_ACCIDENTAL)) { 845 $encoding = undef; 846 my $decoder = guess_encoding($data, UTF8_ACCIDENTAL); 847 $encoding = $decoder->name if ref $decoder; 848 } 849 850 # Encode::Detect sometimes mis-detects various ISO encodings as iso-8859-8, 851 # or cp1255, but Encode::Guess can usually tell which one it is. 852 if ($encoding && ($encoding eq 'iso-8859-8' || $encoding eq 'cp1255')) { 853 my $decoded_as = _guess_iso($data, 'iso-8859-8', 854 # These are ordered this way because it gives the most 855 # accurate results. 856 qw(cp1252 iso-8859-7 iso-8859-2)); 857 $encoding = $decoded_as if $decoded_as; 858 } 859 860 return $encoding; 861} 862 863# A helper for detect_encoding. 864sub _guess_iso { 865 my ($data, $versus, @isos) = (shift, shift, shift); 866 867 my $encoding; 868 foreach my $iso (@isos) { 869 my $decoder = guess_encoding($data, ($iso, $versus)); 870 if (ref $decoder) { 871 $encoding = $decoder->name if ref $decoder; 872 last; 873 } 874 } 875 return $encoding; 876} 877 8781; 879 880__END__ 881 882=head1 NAME 883 884Bugzilla::Util - Generic utility functions for bugzilla 885 886=head1 SYNOPSIS 887 888 use Bugzilla::Util; 889 890 # Functions for dealing with variable tainting 891 trick_taint($var); 892 detaint_natural($var); 893 detaint_signed($var); 894 895 # Functions for quoting 896 html_quote($var); 897 url_quote($var); 898 xml_quote($var); 899 email_filter($var); 900 901 # Functions that tell you about your environment 902 my $is_cgi = i_am_cgi(); 903 my $is_webservice = i_am_webservice(); 904 my $urlbase = correct_urlbase(); 905 906 # Data manipulation 907 ($removed, $added) = diff_arrays(\@old, \@new); 908 909 # Functions for manipulating strings 910 $val = trim(" abc "); 911 $wrapped = wrap_comment($comment); 912 913 # Functions for formatting time 914 format_time($time); 915 datetime_from($time, $timezone); 916 917 # Cryptographic Functions 918 $crypted_password = bz_crypt($password); 919 $new_password = generate_random_password($password_length); 920 921 # Validation Functions 922 validate_email_syntax($email); 923 check_email_syntax($email); 924 validate_date($date); 925 926 # DB-related functions 927 on_main_db { 928 ... code here ... 929 }; 930 931=head1 DESCRIPTION 932 933This package contains various utility functions which do not belong anywhere 934else. 935 936B<It is not intended as a general dumping group for something which 937people feel might be useful somewhere, someday>. Do not add methods to this 938package unless it is intended to be used for a significant number of files, 939and it does not belong anywhere else. 940 941=head1 FUNCTIONS 942 943This package provides several types of routines: 944 945=head2 Tainting 946 947Several functions are available to deal with tainted variables. B<Use these 948with care> to avoid security holes. 949 950=over 4 951 952=item C<trick_taint($val)> 953 954Tricks perl into untainting a particular variable. 955 956Use trick_taint() when you know that there is no way that the data 957in a scalar can be tainted, but taint mode still bails on it. 958 959B<WARNING!! Using this routine on data that really could be tainted defeats 960the purpose of taint mode. It should only be used on variables that have been 961sanity checked in some way and have been determined to be OK.> 962 963=item C<detaint_natural($num)> 964 965This routine detaints a natural number. It returns a true value if the 966value passed in was a valid natural number, else it returns false. You 967B<MUST> check the result of this routine to avoid security holes. 968 969=item C<detaint_signed($num)> 970 971This routine detaints a signed integer. It returns a true value if the 972value passed in was a valid signed integer, else it returns false. You 973B<MUST> check the result of this routine to avoid security holes. 974 975=back 976 977=head2 Quoting 978 979Some values may need to be quoted from perl. However, this should in general 980be done in the template where possible. 981 982=over 4 983 984=item C<html_quote($val)> 985 986Returns a value quoted for use in HTML, with &, E<lt>, E<gt>, E<34> and @ being 987replaced with their appropriate HTML entities. Also, Unicode BiDi controls are 988deleted. 989 990=item C<html_light_quote($val)> 991 992Returns a string where only explicitly allowed HTML elements and attributes 993are kept. All HTML elements and attributes not being in the whitelist are either 994escaped (if HTML::Scrubber is not installed) or removed. 995 996=item C<url_quote($val)> 997 998Quotes characters so that they may be included as part of a url. 999 1000=item C<css_class_quote($val)> 1001 1002Quotes characters so that they may be used as CSS class names. Spaces 1003and forward slashes are replaced by underscores. 1004 1005=item C<xml_quote($val)> 1006 1007This is similar to C<html_quote>, except that ' is escaped to '. This 1008is kept separate from html_quote partly for compatibility with previous code 1009(for ') and partly for future handling of non-ASCII characters. 1010 1011=item C<email_filter> 1012 1013Removes the hostname from email addresses in the string, if the user 1014currently viewing Bugzilla is logged out. If the user is logged-in, 1015this filter just returns the input string. 1016 1017=back 1018 1019=head2 Environment and Location 1020 1021Functions returning information about your environment or location. 1022 1023=over 4 1024 1025=item C<i_am_cgi()> 1026 1027Tells you whether or not you are being run as a CGI script in a web 1028server. For example, it would return false if the caller is running 1029in a command-line script. 1030 1031=item C<i_am_webservice()> 1032 1033Tells you whether or not the current usage mode is WebServices related 1034such as JSONRPC, XMLRPC, or REST. 1035 1036=item C<correct_urlbase()> 1037 1038Returns either the C<sslbase> or C<urlbase> parameter, depending on the 1039current setting for the C<ssl_redirect> parameter. 1040 1041=item C<remote_ip()> 1042 1043Returns the IP address of the remote client. If Bugzilla is behind 1044a trusted proxy, it will get the remote IP address by looking at the 1045X-Forwarded-For header. 1046 1047=item C<validate_ip($ip)> 1048 1049Returns the sanitized IP address if it is a valid IPv4 or IPv6 address, 1050else returns undef. 1051 1052=item C<use_attachbase()> 1053 1054Returns true if an alternate host is used to display attachments; false 1055otherwise. 1056 1057=back 1058 1059=head2 Data Manipulation 1060 1061=over 4 1062 1063=item C<diff_arrays(\@old, \@new)> 1064 1065 Description: Takes two arrayrefs, and will tell you what it takes to 1066 get from @old to @new. 1067 Params: @old = array that you are changing from 1068 @new = array that you are changing to 1069 Returns: A list of two arrayrefs. The first is a reference to an 1070 array containing items that were removed from @old. The 1071 second is a reference to an array containing items 1072 that were added to @old. If both returned arrays are 1073 empty, @old and @new contain the same values. 1074 1075=back 1076 1077=head2 String Manipulation 1078 1079=over 4 1080 1081=item C<trim($str)> 1082 1083Removes any leading or trailing whitespace from a string. This routine does not 1084modify the existing string. 1085 1086=item C<wrap_hard($string, $size)> 1087 1088Wraps a string, so that a line is I<never> longer than C<$size>. 1089Returns the string, wrapped. 1090 1091=item C<wrap_comment($comment)> 1092 1093Takes a bug comment, and wraps it to the appropriate length. The length is 1094currently specified in C<Bugzilla::Constants::COMMENT_COLS>. Lines beginning 1095with ">" are assumed to be quotes, and they will not be wrapped. 1096 1097The intended use of this function is to wrap comments that are about to be 1098displayed or emailed. Generally, wrapped text should not be stored in the 1099database. 1100 1101=item C<find_wrap_point($string, $maxpos)> 1102 1103Search for a comma, a whitespace or a hyphen to split $string, within the first 1104$maxpos characters. If none of them is found, just split $string at $maxpos. 1105The search starts at $maxpos and goes back to the beginning of the string. 1106 1107=item C<join_activity_entries($field, $current_change, $new_change)> 1108 1109Joins two strings together so they appear as one. The field name is specified 1110as the method of joining the two strings depends on this. Returns the 1111combined string. 1112 1113=item C<is_7bit_clean($str)> 1114 1115Returns true is the string contains only 7-bit characters (ASCII 32 through 126, 1116ASCII 10 (LineFeed) and ASCII 13 (Carrage Return). 1117 1118=item C<disable_utf8()> 1119 1120Disable utf8 on STDOUT (and display raw data instead). 1121 1122=item C<detect_encoding($str)> 1123 1124Guesses what encoding a given data is encoded in, returning the canonical name 1125of the detected encoding (which may be different from the MIME charset 1126specification). 1127 1128=item C<clean_text($str)> 1129Returns the parameter "cleaned" by exchanging non-printable characters with spaces. 1130Specifically characters (ASCII 0 through 31) and (ASCII 127) will become ASCII 32 (Space). 1131 1132=item C<get_text> 1133 1134=over 1135 1136=item B<Description> 1137 1138This is a method of getting localized strings within Bugzilla code. 1139Use this when you don't want to display a whole template, you just 1140want a particular string. 1141 1142It uses the F<global/message.txt.tmpl> template to return a string. 1143 1144=item B<Params> 1145 1146=over 1147 1148=item C<$message> - The identifier for the message. 1149 1150=item C<$vars> - A hashref. Any variables you want to pass to the template. 1151 1152=back 1153 1154=item B<Returns> 1155 1156A string. 1157 1158=back 1159 1160 1161=item C<template_var> 1162 1163This is a method of getting the value of a variable from a template in 1164Perl code. The available variables are in the C<global/field-descs.none.tmpl> 1165template. Just pass in the name of the variable that you want the value of. 1166 1167 1168=back 1169 1170=head2 Formatting Time 1171 1172=over 4 1173 1174=item C<format_time($time)> 1175 1176Takes a time and converts it to the desired format and timezone. 1177If no format is given, the routine guesses the correct one and returns 1178an empty array if it cannot. If no timezone is given, the user's timezone 1179is used, as defined in their preferences. 1180 1181This routine is mainly called from templates to filter dates, see 1182"FILTER time" in L<Bugzilla::Template>. 1183 1184=item C<datetime_from($time, $timezone)> 1185 1186Returns a DateTime object given a date string. If the string is not in some 1187valid date format that C<strptime> understands, we return C<undef>. 1188 1189You can optionally specify a timezone for the returned date. If not 1190specified, defaults to the currently-logged-in user's timezone, or 1191the Bugzilla server's local timezone if there isn't a logged-in user. 1192 1193=back 1194 1195=head2 Cryptography 1196 1197=over 4 1198 1199=item C<bz_crypt($password, $salt)> 1200 1201Takes a string and returns a hashed (encrypted) value for it, using a 1202random salt. An optional salt string may also be passed in. 1203 1204Please always use this function instead of the built-in perl C<crypt> 1205function, when checking or setting a password. Bugzilla does not use 1206C<crypt>. 1207 1208=begin undocumented 1209 1210Random salts are generated because the alternative is usually 1211to use the first two characters of the password itself, and since 1212the salt appears in plaintext at the beginning of the encrypted 1213password string this has the effect of revealing the first two 1214characters of the password to anyone who views the encrypted version. 1215 1216=end undocumented 1217 1218=item C<generate_random_password($password_length)> 1219 1220Returns an alphanumeric string with the specified length 1221(10 characters by default). Use this function to generate passwords 1222and tokens. 1223 1224=back 1225 1226=head2 Validation 1227 1228=over 4 1229 1230=item C<validate_email_syntax($email)> 1231 1232Do a syntax checking for a legal email address and returns 1 if 1233the check is successful, else returns 0. 1234Untaints C<$email> if successful. 1235 1236=item C<check_email_syntax($email)> 1237 1238Do a syntax checking for a legal email address and throws an error 1239if the check fails. 1240Untaints C<$email> if successful. 1241 1242=item C<validate_date($date)> 1243 1244Make sure the date has the correct format and returns 1 if 1245the check is successful, else returns 0. 1246 1247=back 1248 1249=head2 Database 1250 1251=over 1252 1253=item C<on_main_db> 1254 1255Runs a block of code always on the main DB. Useful for when you're inside 1256a subroutine and need to do some writes to the database, but don't know 1257if Bugzilla is currently using the shadowdb or not. Used like: 1258 1259 on_main_db { 1260 my $dbh = Bugzilla->dbh; 1261 $dbh->do("INSERT ..."); 1262 } 1263 1264=back 1265 1266=head1 B<Methods in need of POD> 1267 1268=over 1269 1270=item do_ssl_redirect_if_required 1271 1272=item validate_time 1273 1274=item is_ipv4 1275 1276=item is_ipv6 1277 1278=item display_value 1279 1280=back 1281