1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4# 5# This Source Code Form is "Incompatible With Secondary Licenses", as 6# defined by the Mozilla Public License, v. 2.0. 7 8package Bugzilla::Util; 9 10use strict; 11 12use base qw(Exporter); 13@Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed 14 html_quote url_quote xml_quote 15 css_class_quote html_light_quote 16 i_am_cgi i_am_webservice correct_urlbase remote_ip 17 validate_ip do_ssl_redirect_if_required use_attachbase 18 diff_arrays on_main_db say 19 trim wrap_hard wrap_comment find_wrap_point 20 format_time validate_date validate_time datetime_from 21 is_7bit_clean bz_crypt generate_random_password 22 validate_email_syntax check_email_syntax clean_text 23 get_text template_var disable_utf8 24 detect_encoding 25 join_activity_entries); 26 27use Bugzilla::Constants; 28use Bugzilla::RNG qw(irand); 29use Bugzilla::Error; 30 31use Date::Parse; 32use Date::Format; 33use Digest; 34use Email::Address; 35use List::Util qw(first); 36use Scalar::Util qw(tainted blessed); 37use Template::Filters; 38use Text::Wrap; 39use Encode qw(encode decode resolve_alias); 40use Encode::Guess; 41 42sub trick_taint { 43 require Carp; 44 Carp::confess("Undef to trick_taint") unless defined $_[0]; 45 my $match = $_[0] =~ /^(.*)$/s; 46 $_[0] = $match ? $1 : undef; 47 return (defined($_[0])); 48} 49 50sub detaint_natural { 51 my $match = $_[0] =~ /^(\d+)$/; 52 $_[0] = $match ? int($1) : undef; 53 return (defined($_[0])); 54} 55 56sub detaint_signed { 57 my $match = $_[0] =~ /^([-+]?\d+)$/; 58 # The "int()" call removes any leading plus sign. 59 $_[0] = $match ? int($1) : undef; 60 return (defined($_[0])); 61} 62 63# Bug 120030: Override html filter to obscure the '@' in user 64# visible strings. 65# Bug 319331: Handle BiDi disruptions. 66sub html_quote { 67 my ($var) = Template::Filters::html_filter(@_); 68 # Obscure '@'. 69 $var =~ s/\@/\@/g; 70 if (Bugzilla->params->{'utf8'}) { 71 # Remove control characters if the encoding is utf8. 72 # Other multibyte encodings may be using this range; so ignore if not utf8. 73 $var =~ s/(?![\t\r\n])[[:cntrl:]]//g; 74 75 # Remove the following characters because they're 76 # influencing BiDi: 77 # -------------------------------------------------------- 78 # |Code |Name |UTF-8 representation| 79 # |------|--------------------------|--------------------| 80 # |U+202a|Left-To-Right Embedding |0xe2 0x80 0xaa | 81 # |U+202b|Right-To-Left Embedding |0xe2 0x80 0xab | 82 # |U+202c|Pop Directional Formatting|0xe2 0x80 0xac | 83 # |U+202d|Left-To-Right Override |0xe2 0x80 0xad | 84 # |U+202e|Right-To-Left Override |0xe2 0x80 0xae | 85 # -------------------------------------------------------- 86 # 87 # The following are characters influencing BiDi, too, but 88 # they can be spared from filtering because they don't 89 # influence more than one character right or left: 90 # -------------------------------------------------------- 91 # |Code |Name |UTF-8 representation| 92 # |------|--------------------------|--------------------| 93 # |U+200e|Left-To-Right Mark |0xe2 0x80 0x8e | 94 # |U+200f|Right-To-Left Mark |0xe2 0x80 0x8f | 95 # -------------------------------------------------------- 96 $var =~ s/[\x{202a}-\x{202e}]//g; 97 } 98 return $var; 99} 100 101sub html_light_quote { 102 my ($text) = @_; 103 # admin/table.html.tmpl calls |FILTER html_light| many times. 104 # There is no need to recreate the HTML::Scrubber object again and again. 105 my $scrubber = Bugzilla->process_cache->{html_scrubber}; 106 107 # List of allowed HTML elements having no attributes. 108 my @allow = qw(b strong em i u p br abbr acronym ins del cite code var 109 dfn samp kbd big small sub sup tt dd dt dl ul li ol 110 fieldset legend); 111 112 if (!Bugzilla->feature('html_desc')) { 113 my $safe = join('|', @allow); 114 my $chr = chr(1); 115 116 # First, escape safe elements. 117 $text =~ s#<($safe)>#$chr$1$chr#go; 118 $text =~ s#</($safe)>#$chr/$1$chr#go; 119 # Now filter < and >. 120 $text =~ s#<#<#g; 121 $text =~ s#>#>#g; 122 # Restore safe elements. 123 $text =~ s#$chr/($safe)$chr#</$1>#go; 124 $text =~ s#$chr($safe)$chr#<$1>#go; 125 return $text; 126 } 127 elsif (!$scrubber) { 128 # We can be less restrictive. We can accept elements with attributes. 129 push(@allow, qw(a blockquote q span)); 130 131 # Allowed protocols. 132 my $safe_protocols = join('|', SAFE_PROTOCOLS); 133 my $protocol_regexp = qr{(^(?:$safe_protocols):|^[^:]+$)}i; 134 135 # Deny all elements and attributes unless explicitly authorized. 136 my @default = (0 => { 137 id => 1, 138 name => 1, 139 class => 1, 140 '*' => 0, # Reject all other attributes. 141 } 142 ); 143 144 # Specific rules for allowed elements. If no specific rule is set 145 # for a given element, then the default is used. 146 my @rules = (a => { 147 href => $protocol_regexp, 148 target => qr{^(?:_blank|_parent|_self|_top)$}i, 149 title => 1, 150 id => 1, 151 name => 1, 152 class => 1, 153 '*' => 0, # Reject all other attributes. 154 }, 155 blockquote => { 156 cite => $protocol_regexp, 157 id => 1, 158 name => 1, 159 class => 1, 160 '*' => 0, # Reject all other attributes. 161 }, 162 'q' => { 163 cite => $protocol_regexp, 164 id => 1, 165 name => 1, 166 class => 1, 167 '*' => 0, # Reject all other attributes. 168 }, 169 ); 170 171 Bugzilla->process_cache->{html_scrubber} = $scrubber = 172 HTML::Scrubber->new(default => \@default, 173 allow => \@allow, 174 rules => \@rules, 175 comment => 0, 176 process => 0); 177 } 178 return $scrubber->scrub($text); 179} 180 181sub email_filter { 182 my ($toencode) = @_; 183 if (!Bugzilla->user->id) { 184 my @emails = Email::Address->parse($toencode); 185 if (scalar @emails) { 186 my @hosts = map { quotemeta($_->host) } @emails; 187 my $hosts_re = join('|', @hosts); 188 $toencode =~ s/\@(?:$hosts_re)//g; 189 return $toencode; 190 } 191 } 192 return $toencode; 193} 194 195# This originally came from CGI.pm, by Lincoln D. Stein 196sub url_quote { 197 my ($toencode) = (@_); 198 utf8::encode($toencode) # The below regex works only on bytes 199 if Bugzilla->params->{'utf8'} && utf8::is_utf8($toencode); 200 $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg; 201 return $toencode; 202} 203 204sub css_class_quote { 205 my ($toencode) = (@_); 206 $toencode =~ s#[ /]#_#g; 207 $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("&#x%x;",ord($1))/eg; 208 return $toencode; 209} 210 211sub xml_quote { 212 my ($var) = (@_); 213 $var =~ s/\&/\&/g; 214 $var =~ s/</\</g; 215 $var =~ s/>/\>/g; 216 $var =~ s/\"/\"/g; 217 $var =~ s/\'/\'/g; 218 219 # the following nukes characters disallowed by the XML 1.0 220 # spec, Production 2.2. 1.0 declares that only the following 221 # are valid: 222 # (#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]) 223 $var =~ s/([\x{0001}-\x{0008}]| 224 [\x{000B}-\x{000C}]| 225 [\x{000E}-\x{001F}]| 226 [\x{D800}-\x{DFFF}]| 227 [\x{FFFE}-\x{FFFF}])//gx; 228 return $var; 229} 230 231sub i_am_cgi { 232 # I use SERVER_SOFTWARE because it's required to be 233 # defined for all requests in the CGI spec. 234 return exists $ENV{'SERVER_SOFTWARE'} ? 1 : 0; 235} 236 237sub i_am_webservice { 238 my $usage_mode = Bugzilla->usage_mode; 239 return $usage_mode == USAGE_MODE_XMLRPC 240 || $usage_mode == USAGE_MODE_JSON; 241} 242 243# This exists as a separate function from Bugzilla::CGI::redirect_to_https 244# because we don't want to create a CGI object during XML-RPC calls 245# (doing so can mess up XML-RPC). 246sub do_ssl_redirect_if_required { 247 return if !i_am_cgi(); 248 return if !Bugzilla->params->{'ssl_redirect'}; 249 250 my $sslbase = Bugzilla->params->{'sslbase'}; 251 252 # If we're already running under SSL, never redirect. 253 return if uc($ENV{HTTPS} || '') eq 'ON'; 254 # Never redirect if there isn't an sslbase. 255 return if !$sslbase; 256 Bugzilla->cgi->redirect_to_https(); 257} 258 259sub correct_urlbase { 260 my $ssl = Bugzilla->params->{'ssl_redirect'}; 261 my $urlbase = Bugzilla->params->{'urlbase'}; 262 my $sslbase = Bugzilla->params->{'sslbase'}; 263 264 if (!$sslbase) { 265 return $urlbase; 266 } 267 elsif ($ssl) { 268 return $sslbase; 269 } 270 else { 271 # Return what the user currently uses. 272 return (uc($ENV{HTTPS} || '') eq 'ON') ? $sslbase : $urlbase; 273 } 274} 275 276sub remote_ip { 277 my $ip = $ENV{'REMOTE_ADDR'} || '127.0.0.1'; 278 my @proxies = split(/[\s,]+/, Bugzilla->params->{'inbound_proxies'}); 279 280 # If the IP address is one of our trusted proxies, then we look at 281 # the X-Forwarded-For header to determine the real remote IP address. 282 if ($ENV{'HTTP_X_FORWARDED_FOR'} && first { $_ eq $ip } @proxies) { 283 my @ips = split(/[\s,]+/, $ENV{'HTTP_X_FORWARDED_FOR'}); 284 # This header can contain several IP addresses. We want the 285 # IP address of the machine which connected to our proxies as 286 # all other IP addresses may be fake or internal ones. 287 # Note that this may block a whole external proxy, but we have 288 # no way to determine if this proxy is malicious or trustable. 289 foreach my $remote_ip (reverse @ips) { 290 if (!first { $_ eq $remote_ip } @proxies) { 291 # Keep the original IP address if the remote IP is invalid. 292 $ip = validate_ip($remote_ip) || $ip; 293 last; 294 } 295 } 296 } 297 return $ip; 298} 299 300sub validate_ip { 301 my $ip = shift; 302 return is_ipv4($ip) || is_ipv6($ip); 303} 304 305# Copied from Data::Validate::IP::is_ipv4(). 306sub is_ipv4 { 307 my $ip = shift; 308 return unless defined $ip; 309 310 my @octets = $ip =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/; 311 return unless scalar(@octets) == 4; 312 313 foreach my $octet (@octets) { 314 return unless ($octet >= 0 && $octet <= 255 && $octet !~ /^0\d{1,2}$/); 315 } 316 317 # The IP address is valid and can now be detainted. 318 return join('.', @octets); 319} 320 321# Copied from Data::Validate::IP::is_ipv6(). 322sub is_ipv6 { 323 my $ip = shift; 324 return unless defined $ip; 325 326 # If there is a :: then there must be only one :: and the length 327 # can be variable. Without it, the length must be 8 groups. 328 my @chunks = split(':', $ip); 329 330 # Need to check if the last chunk is an IPv4 address, if it is we 331 # pop it off and exempt it from the normal IPv6 checking and stick 332 # it back on at the end. If there is only one chunk and it's an IPv4 333 # address, then it isn't an IPv6 address. 334 my $ipv4; 335 my $expected_chunks = 8; 336 if (@chunks > 1 && is_ipv4($chunks[$#chunks])) { 337 $ipv4 = pop(@chunks); 338 $expected_chunks--; 339 } 340 341 my $empty = 0; 342 # Workaround to handle trailing :: being valid. 343 if ($ip =~ /[0-9a-f]{1,4}::$/) { 344 $empty++; 345 # Single trailing ':' is invalid. 346 } elsif ($ip =~ /:$/) { 347 return; 348 } 349 350 foreach my $chunk (@chunks) { 351 return unless $chunk =~ /^[0-9a-f]{0,4}$/i; 352 $empty++ if $chunk eq ''; 353 } 354 # More than one :: block is bad, but if it starts with :: it will 355 # look like two, so we need an exception. 356 if ($empty == 2 && $ip =~ /^::/) { 357 # This is ok 358 } elsif ($empty > 1) { 359 return; 360 } 361 362 push(@chunks, $ipv4) if $ipv4; 363 # Need 8 chunks, or we need an empty section that could be filled 364 # to represent the missing '0' sections. 365 return unless (@chunks == $expected_chunks || @chunks < $expected_chunks && $empty); 366 367 my $ipv6 = join(':', @chunks); 368 # The IP address is valid and can now be detainted. 369 trick_taint($ipv6); 370 371 # Need to handle the exception of trailing :: being valid. 372 return "${ipv6}::" if $ip =~ /::$/; 373 return $ipv6; 374} 375 376sub use_attachbase { 377 my $attachbase = Bugzilla->params->{'attachment_base'}; 378 return ($attachbase ne '' 379 && $attachbase ne Bugzilla->params->{'urlbase'} 380 && $attachbase ne Bugzilla->params->{'sslbase'}) ? 1 : 0; 381} 382 383sub diff_arrays { 384 my ($old_ref, $new_ref, $attrib) = @_; 385 $attrib ||= 'name'; 386 387 my (%counts, %pos); 388 # We are going to alter the old array. 389 my @old = @$old_ref; 390 my $i = 0; 391 392 # $counts{foo}-- means old, $counts{foo}++ means new. 393 # If $counts{foo} becomes positive, then we are adding new items, 394 # else we simply cancel one old existing item. Remaining items 395 # in the old list have been removed. 396 foreach (@old) { 397 next unless defined $_; 398 my $value = blessed($_) ? $_->$attrib : $_; 399 $counts{$value}--; 400 push @{$pos{$value}}, $i++; 401 } 402 my @added; 403 foreach (@$new_ref) { 404 next unless defined $_; 405 my $value = blessed($_) ? $_->$attrib : $_; 406 if (++$counts{$value} > 0) { 407 # Ignore empty strings, but objects having an empty string 408 # as attribute are fine. 409 push(@added, $_) unless ($value eq '' && !blessed($_)); 410 } 411 else { 412 my $old_pos = shift @{$pos{$value}}; 413 $old[$old_pos] = undef; 414 } 415 } 416 # Ignore canceled items as well as empty strings. 417 my @removed = grep { defined $_ && $_ ne '' } @old; 418 return (\@removed, \@added); 419} 420 421# XXX - This is a temporary subroutine till we require Perl 5.10.1. 422# This will happen before Bugzilla 5.0rc1. 423sub say (@) { 424 print @_; 425 print "\n"; 426} 427 428sub trim { 429 my ($str) = @_; 430 if ($str) { 431 $str =~ s/^\s+//g; 432 $str =~ s/\s+$//g; 433 } 434 return $str; 435} 436 437sub wrap_comment { 438 my ($comment, $cols) = @_; 439 my $wrappedcomment = ""; 440 441 # Use 'local', as recommended by Text::Wrap's perldoc. 442 local $Text::Wrap::columns = $cols || COMMENT_COLS; 443 # Make words that are longer than COMMENT_COLS not wrap. 444 local $Text::Wrap::huge = 'overflow'; 445 # Don't mess with tabs. 446 local $Text::Wrap::unexpand = 0; 447 448 # If the line starts with ">", don't wrap it. Otherwise, wrap. 449 foreach my $line (split(/\r\n|\r|\n/, $comment)) { 450 if ($line =~ qr/^>/) { 451 $wrappedcomment .= ($line . "\n"); 452 } 453 else { 454 # Due to a segfault in Text::Tabs::expand() when processing tabs with 455 # Unicode (see http://rt.perl.org/rt3/Public/Bug/Display.html?id=52104), 456 # we have to remove tabs before processing the comment. This restriction 457 # can go away when we require Perl 5.8.9 or newer. 458 $line =~ s/\t/ /g; 459 $wrappedcomment .= (wrap('', '', $line) . "\n"); 460 } 461 } 462 463 chomp($wrappedcomment); # Text::Wrap adds an extra newline at the end. 464 return $wrappedcomment; 465} 466 467sub find_wrap_point { 468 my ($string, $maxpos) = @_; 469 if (!$string) { return 0 } 470 if (length($string) < $maxpos) { return length($string) } 471 my $wrappoint = rindex($string, ",", $maxpos); # look for comma 472 if ($wrappoint <= 0) { # can't find comma 473 $wrappoint = rindex($string, " ", $maxpos); # look for space 474 if ($wrappoint <= 0) { # can't find space 475 $wrappoint = rindex($string, "-", $maxpos); # look for hyphen 476 if ($wrappoint <= 0) { # can't find hyphen 477 $wrappoint = $maxpos; # just truncate it 478 } else { 479 $wrappoint++; # leave hyphen on the left side 480 } 481 } 482 } 483 return $wrappoint; 484} 485 486sub join_activity_entries { 487 my ($field, $current_change, $new_change) = @_; 488 # We need to insert characters as these were removed by old 489 # LogActivityEntry code. 490 491 return $new_change if $current_change eq ''; 492 493 # Buglists and see_also need the comma restored 494 if ($field eq 'dependson' || $field eq 'blocked' || $field eq 'see_also') { 495 if (substr($new_change, 0, 1) eq ',' || substr($new_change, 0, 1) eq ' ') { 496 return $current_change . $new_change; 497 } else { 498 return $current_change . ', ' . $new_change; 499 } 500 } 501 502 # Assume bug_file_loc contain a single url, don't insert a delimiter 503 if ($field eq 'bug_file_loc') { 504 return $current_change . $new_change; 505 } 506 507 # All other fields get a space unless the first character of the second 508 # string is a comma or space 509 if (substr($new_change, 0, 1) eq ',' || substr($new_change, 0, 1) eq ' ') { 510 return $current_change . $new_change; 511 } else { 512 return $current_change . ' ' . $new_change; 513 } 514} 515 516sub wrap_hard { 517 my ($string, $columns) = @_; 518 local $Text::Wrap::columns = $columns; 519 local $Text::Wrap::unexpand = 0; 520 local $Text::Wrap::huge = 'wrap'; 521 522 my $wrapped = wrap('', '', $string); 523 chomp($wrapped); 524 return $wrapped; 525} 526 527sub format_time { 528 my ($date, $format, $timezone) = @_; 529 530 # If $format is not set, try to guess the correct date format. 531 if (!$format) { 532 if (!ref $date 533 && $date =~ /^(\d{4})[-\.](\d{2})[-\.](\d{2}) (\d{2}):(\d{2})(:(\d{2}))?$/) 534 { 535 my $sec = $7; 536 if (defined $sec) { 537 $format = "%Y-%m-%d %T %Z"; 538 } else { 539 $format = "%Y-%m-%d %R %Z"; 540 } 541 } else { 542 # Default date format. See DateTime for other formats available. 543 $format = "%Y-%m-%d %R %Z"; 544 } 545 } 546 547 my $dt = ref $date ? $date : datetime_from($date, $timezone); 548 $date = defined $dt ? $dt->strftime($format) : ''; 549 return trim($date); 550} 551 552sub datetime_from { 553 my ($date, $timezone) = @_; 554 555 # In the database, this is the "0" date. 556 return undef if $date =~ /^0000/; 557 558 # strptime($date) returns an empty array if $date has an invalid 559 # date format. 560 my @time = strptime($date); 561 562 unless (scalar @time) { 563 # If an unknown timezone is passed (such as MSK, for Moskow), 564 # strptime() is unable to parse the date. We try again, but we first 565 # remove the timezone. 566 $date =~ s/\s+\S+$//; 567 @time = strptime($date); 568 } 569 570 return undef if !@time; 571 572 # strptime() counts years from 1900, except if they are older than 1901 573 # in which case it returns the full year (so 1890 -> 1890, but 1984 -> 84, 574 # and 3790 -> 1890). We make a guess and assume that 1100 <= year < 3000. 575 $time[5] += 1900 if $time[5] < 1100; 576 577 my %args = ( 578 year => $time[5], 579 # Months start from 0 (January). 580 month => $time[4] + 1, 581 day => $time[3], 582 hour => $time[2], 583 minute => $time[1], 584 # DateTime doesn't like fractional seconds. 585 # Also, sometimes seconds are undef. 586 second => defined($time[0]) ? int($time[0]) : undef, 587 # If a timezone was specified, use it. Otherwise, use the 588 # local timezone. 589 time_zone => Bugzilla->local_timezone->offset_as_string($time[6]) 590 || Bugzilla->local_timezone, 591 ); 592 593 # If something wasn't specified in the date, it's best to just not 594 # pass it to DateTime at all. (This is important for doing datetime_from 595 # on the deadline field, which is usually just a date with no time.) 596 foreach my $arg (keys %args) { 597 delete $args{$arg} if !defined $args{$arg}; 598 } 599 600 # This module takes time to load and is only used here, so we 601 # |require| it here rather than |use| it. 602 require DateTime; 603 my $dt = new DateTime(\%args); 604 605 # Now display the date using the given timezone, 606 # or the user's timezone if none is given. 607 $dt->set_time_zone($timezone || Bugzilla->user->timezone); 608 return $dt; 609} 610 611sub bz_crypt { 612 my ($password, $salt) = @_; 613 614 my $algorithm; 615 if (!defined $salt) { 616 # If you don't use a salt, then people can create tables of 617 # hashes that map to particular passwords, and then break your 618 # hashing very easily if they have a large-enough table of common 619 # (or even uncommon) passwords. So we generate a unique salt for 620 # each password in the database, and then just prepend it to 621 # the hash. 622 $salt = generate_random_password(PASSWORD_SALT_LENGTH); 623 $algorithm = PASSWORD_DIGEST_ALGORITHM; 624 } 625 626 # We append the algorithm used to the string. This is good because then 627 # we can change the algorithm being used, in the future, without 628 # disrupting the validation of existing passwords. Also, this tells 629 # us if a password is using the old "crypt" method of hashing passwords, 630 # because the algorithm will be missing from the string. 631 if ($salt =~ /{([^}]+)}$/) { 632 $algorithm = $1; 633 } 634 635 # Wide characters cause crypt and Digest to die. 636 if (Bugzilla->params->{'utf8'}) { 637 utf8::encode($password) if utf8::is_utf8($password); 638 } 639 640 my $crypted_password; 641 if (!$algorithm) { 642 # Crypt the password. 643 $crypted_password = crypt($password, $salt); 644 645 # HACK: Perl has bug where returned crypted password is considered 646 # tainted. See http://rt.perl.org/rt3/Public/Bug/Display.html?id=59998 647 unless(tainted($password) || tainted($salt)) { 648 trick_taint($crypted_password); 649 } 650 } 651 else { 652 my $hasher = Digest->new($algorithm); 653 # We only want to use the first characters of the salt, no 654 # matter how long of a salt we may have been passed. 655 $salt = substr($salt, 0, PASSWORD_SALT_LENGTH); 656 $hasher->add($password, $salt); 657 $crypted_password = $salt . $hasher->b64digest . "{$algorithm}"; 658 } 659 660 # Return the crypted password. 661 return $crypted_password; 662} 663 664# If you want to understand the security of strings generated by this 665# function, here's a quick formula that will help you estimate: 666# We pick from 62 characters, which is close to 64, which is 2^6. 667# So 8 characters is (2^6)^8 == 2^48 combinations. Just multiply 6 668# by the number of characters you generate, and that gets you the equivalent 669# strength of the string in bits. 670sub generate_random_password { 671 my $size = shift || 10; # default to 10 chars if nothing specified 672 return join("", map{ ('0'..'9','a'..'z','A'..'Z')[irand 62] } (1..$size)); 673} 674 675sub validate_email_syntax { 676 my ($addr) = @_; 677 my $match = Bugzilla->params->{'emailregexp'}; 678 my $email = $addr . Bugzilla->params->{'emailsuffix'}; 679 # This regexp follows RFC 2822 section 3.4.1. 680 my $addr_spec = $Email::Address::addr_spec; 681 # RFC 2822 section 2.1 specifies that email addresses must 682 # be made of US-ASCII characters only. 683 # Email::Address::addr_spec doesn't enforce this. 684 # We set the max length to 127 to ensure addresses aren't truncated when 685 # inserted into the tokens.eventdata field. 686 if ($addr =~ /$match/ 687 && $email !~ /\P{ASCII}/ 688 && $email =~ /^$addr_spec$/ 689 && length($email) <= 127) 690 { 691 # We assume these checks to suffice to consider the address untainted. 692 trick_taint($_[0]); 693 return 1; 694 } 695 return 0; 696} 697 698sub check_email_syntax { 699 my ($addr) = @_; 700 701 unless (validate_email_syntax(@_)) { 702 my $email = $addr . Bugzilla->params->{'emailsuffix'}; 703 ThrowUserError('illegal_email_address', { addr => $email }); 704 } 705} 706 707sub validate_date { 708 my ($date) = @_; 709 my $date2; 710 711 # $ts is undefined if the parser fails. 712 my $ts = str2time($date); 713 if ($ts) { 714 $date2 = time2str("%Y-%m-%d", $ts); 715 716 $date =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/; 717 $date2 =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/; 718 } 719 my $ret = ($ts && $date eq $date2); 720 return $ret ? 1 : 0; 721} 722 723sub validate_time { 724 my ($time) = @_; 725 my $time2; 726 727 # $ts is undefined if the parser fails. 728 my $ts = str2time($time); 729 if ($ts) { 730 $time2 = time2str("%H:%M:%S", $ts); 731 if ($time =~ /^(\d{1,2}):(\d\d)(?::(\d\d))?$/) { 732 $time = sprintf("%02d:%02d:%02d", $1, $2, $3 || 0); 733 } 734 } 735 my $ret = ($ts && $time eq $time2); 736 return $ret ? 1 : 0; 737} 738 739sub is_7bit_clean { 740 return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/; 741} 742 743sub clean_text { 744 my $dtext = shift; 745 if ($dtext) { 746 # change control characters into a space 747 $dtext =~ s/[\x00-\x1F\x7F]+/ /g; 748 } 749 return trim($dtext); 750} 751 752sub on_main_db (&) { 753 my $code = shift; 754 my $original_dbh = Bugzilla->dbh; 755 Bugzilla->request_cache->{dbh} = Bugzilla->dbh_main; 756 $code->(); 757 Bugzilla->request_cache->{dbh} = $original_dbh; 758} 759 760sub get_text { 761 my ($name, $vars) = @_; 762 my $template = Bugzilla->template_inner; 763 $vars ||= {}; 764 $vars->{'message'} = $name; 765 my $message; 766 $template->process('global/message.txt.tmpl', $vars, \$message) 767 || ThrowTemplateError($template->error()); 768 769 # Remove the indenting that exists in messages.html.tmpl. 770 $message =~ s/^ //gm; 771 return $message; 772} 773 774sub template_var { 775 my $name = shift; 776 my $cache = Bugzilla->request_cache->{util_template_var} ||= {}; 777 my $template = Bugzilla->template_inner; 778 my $lang = $template->context->{bz_language}; 779 return $cache->{$lang}->{$name} if defined $cache->{$lang}; 780 my %vars; 781 # Note: If we suddenly start needing a lot of template_var variables, 782 # they should move into their own template, not field-descs. 783 $template->process('global/field-descs.none.tmpl', 784 { vars => \%vars, in_template_var => 1 }) 785 || ThrowTemplateError($template->error()); 786 787 $cache->{$lang} = \%vars; 788 return $vars{$name}; 789} 790 791sub display_value { 792 my ($field, $value) = @_; 793 my $value_descs = template_var('value_descs'); 794 if (defined $value_descs->{$field}->{$value}) { 795 return $value_descs->{$field}->{$value}; 796 } 797 return $value; 798} 799 800sub disable_utf8 { 801 if (Bugzilla->params->{'utf8'}) { 802 binmode STDOUT, ':bytes'; # Turn off UTF8 encoding. 803 } 804} 805 806use constant UTF8_ACCIDENTAL => qw(shiftjis big5-eten euc-kr euc-jp); 807 808sub detect_encoding { 809 my $data = shift; 810 811 Bugzilla->feature('detect_charset') 812 || ThrowUserError('feature_disabled', { feature => 'detect_charset' }); 813 814 require Encode::Detect::Detector; 815 import Encode::Detect::Detector 'detect'; 816 817 my $encoding = detect($data); 818 $encoding = resolve_alias($encoding) if $encoding; 819 820 # Encode::Detect is bad at detecting certain charsets, but Encode::Guess 821 # is better at them. Here's the details: 822 823 # shiftjis, big5-eten, euc-kr, and euc-jp: (Encode::Detect 824 # tends to accidentally mis-detect UTF-8 strings as being 825 # these encodings.) 826 if ($encoding && grep($_ eq $encoding, UTF8_ACCIDENTAL)) { 827 $encoding = undef; 828 my $decoder = guess_encoding($data, UTF8_ACCIDENTAL); 829 $encoding = $decoder->name if ref $decoder; 830 } 831 832 # Encode::Detect sometimes mis-detects various ISO encodings as iso-8859-8, 833 # or cp1255, but Encode::Guess can usually tell which one it is. 834 if ($encoding && ($encoding eq 'iso-8859-8' || $encoding eq 'cp1255')) { 835 my $decoded_as = _guess_iso($data, 'iso-8859-8', 836 # These are ordered this way because it gives the most 837 # accurate results. 838 qw(cp1252 iso-8859-7 iso-8859-2)); 839 $encoding = $decoded_as if $decoded_as; 840 } 841 842 return $encoding; 843} 844 845# A helper for detect_encoding. 846sub _guess_iso { 847 my ($data, $versus, @isos) = (shift, shift, shift); 848 849 my $encoding; 850 foreach my $iso (@isos) { 851 my $decoder = guess_encoding($data, ($iso, $versus)); 852 if (ref $decoder) { 853 $encoding = $decoder->name if ref $decoder; 854 last; 855 } 856 } 857 return $encoding; 858} 859 8601; 861 862__END__ 863 864=head1 NAME 865 866Bugzilla::Util - Generic utility functions for bugzilla 867 868=head1 SYNOPSIS 869 870 use Bugzilla::Util; 871 872 # Functions for dealing with variable tainting 873 trick_taint($var); 874 detaint_natural($var); 875 detaint_signed($var); 876 877 # Functions for quoting 878 html_quote($var); 879 url_quote($var); 880 xml_quote($var); 881 email_filter($var); 882 883 # Functions that tell you about your environment 884 my $is_cgi = i_am_cgi(); 885 my $is_webservice = i_am_webservice(); 886 my $urlbase = correct_urlbase(); 887 888 # Data manipulation 889 ($removed, $added) = diff_arrays(\@old, \@new); 890 891 # Functions for manipulating strings 892 $val = trim(" abc "); 893 $wrapped = wrap_comment($comment); 894 895 # Functions for formatting time 896 format_time($time); 897 datetime_from($time, $timezone); 898 899 # Cryptographic Functions 900 $crypted_password = bz_crypt($password); 901 $new_password = generate_random_password($password_length); 902 903 # Validation Functions 904 validate_email_syntax($email); 905 check_email_syntax($email); 906 validate_date($date); 907 908 # DB-related functions 909 on_main_db { 910 ... code here ... 911 }; 912 913=head1 DESCRIPTION 914 915This package contains various utility functions which do not belong anywhere 916else. 917 918B<It is not intended as a general dumping group for something which 919people feel might be useful somewhere, someday>. Do not add methods to this 920package unless it is intended to be used for a significant number of files, 921and it does not belong anywhere else. 922 923=head1 FUNCTIONS 924 925This package provides several types of routines: 926 927=head2 Tainting 928 929Several functions are available to deal with tainted variables. B<Use these 930with care> to avoid security holes. 931 932=over 4 933 934=item C<trick_taint($val)> 935 936Tricks perl into untainting a particular variable. 937 938Use trick_taint() when you know that there is no way that the data 939in a scalar can be tainted, but taint mode still bails on it. 940 941B<WARNING!! Using this routine on data that really could be tainted defeats 942the purpose of taint mode. It should only be used on variables that have been 943sanity checked in some way and have been determined to be OK.> 944 945=item C<detaint_natural($num)> 946 947This routine detaints a natural number. It returns a true value if the 948value passed in was a valid natural number, else it returns false. You 949B<MUST> check the result of this routine to avoid security holes. 950 951=item C<detaint_signed($num)> 952 953This routine detaints a signed integer. It returns a true value if the 954value passed in was a valid signed integer, else it returns false. You 955B<MUST> check the result of this routine to avoid security holes. 956 957=back 958 959=head2 Quoting 960 961Some values may need to be quoted from perl. However, this should in general 962be done in the template where possible. 963 964=over 4 965 966=item C<html_quote($val)> 967 968Returns a value quoted for use in HTML, with &, E<lt>, E<gt>, E<34> and @ being 969replaced with their appropriate HTML entities. Also, Unicode BiDi controls are 970deleted. 971 972=item C<html_light_quote($val)> 973 974Returns a string where only explicitly allowed HTML elements and attributes 975are kept. All HTML elements and attributes not being in the whitelist are either 976escaped (if HTML::Scrubber is not installed) or removed. 977 978=item C<url_quote($val)> 979 980Quotes characters so that they may be included as part of a url. 981 982=item C<css_class_quote($val)> 983 984Quotes characters so that they may be used as CSS class names. Spaces 985and forward slashes are replaced by underscores. 986 987=item C<xml_quote($val)> 988 989This is similar to C<html_quote>, except that ' is escaped to '. This 990is kept separate from html_quote partly for compatibility with previous code 991(for ') and partly for future handling of non-ASCII characters. 992 993=item C<email_filter> 994 995Removes the hostname from email addresses in the string, if the user 996currently viewing Bugzilla is logged out. If the user is logged-in, 997this filter just returns the input string. 998 999=back 1000 1001=head2 Environment and Location 1002 1003Functions returning information about your environment or location. 1004 1005=over 4 1006 1007=item C<i_am_cgi()> 1008 1009Tells you whether or not you are being run as a CGI script in a web 1010server. For example, it would return false if the caller is running 1011in a command-line script. 1012 1013=item C<i_am_webservice()> 1014 1015Tells you whether or not the current usage mode is WebServices related 1016such as JSONRPC or XMLRPC. 1017 1018=item C<correct_urlbase()> 1019 1020Returns either the C<sslbase> or C<urlbase> parameter, depending on the 1021current setting for the C<ssl_redirect> parameter. 1022 1023=item C<remote_ip()> 1024 1025Returns the IP address of the remote client. If Bugzilla is behind 1026a trusted proxy, it will get the remote IP address by looking at the 1027X-Forwarded-For header. 1028 1029=item C<validate_ip($ip)> 1030 1031Returns the sanitized IP address if it is a valid IPv4 or IPv6 address, 1032else returns undef. 1033 1034=item C<use_attachbase()> 1035 1036Returns true if an alternate host is used to display attachments; false 1037otherwise. 1038 1039=back 1040 1041=head2 Data Manipulation 1042 1043=over 4 1044 1045=item C<diff_arrays(\@old, \@new)> 1046 1047 Description: Takes two arrayrefs, and will tell you what it takes to 1048 get from @old to @new. 1049 Params: @old = array that you are changing from 1050 @new = array that you are changing to 1051 Returns: A list of two arrayrefs. The first is a reference to an 1052 array containing items that were removed from @old. The 1053 second is a reference to an array containing items 1054 that were added to @old. If both returned arrays are 1055 empty, @old and @new contain the same values. 1056 1057=back 1058 1059=head2 String Manipulation 1060 1061=over 4 1062 1063=item C<trim($str)> 1064 1065Removes any leading or trailing whitespace from a string. This routine does not 1066modify the existing string. 1067 1068=item C<wrap_hard($string, $size)> 1069 1070Wraps a string, so that a line is I<never> longer than C<$size>. 1071Returns the string, wrapped. 1072 1073=item C<wrap_comment($comment)> 1074 1075Takes a bug comment, and wraps it to the appropriate length. The length is 1076currently specified in C<Bugzilla::Constants::COMMENT_COLS>. Lines beginning 1077with ">" are assumed to be quotes, and they will not be wrapped. 1078 1079The intended use of this function is to wrap comments that are about to be 1080displayed or emailed. Generally, wrapped text should not be stored in the 1081database. 1082 1083=item C<find_wrap_point($string, $maxpos)> 1084 1085Search for a comma, a whitespace or a hyphen to split $string, within the first 1086$maxpos characters. If none of them is found, just split $string at $maxpos. 1087The search starts at $maxpos and goes back to the beginning of the string. 1088 1089=item C<join_activity_entries($field, $current_change, $new_change)> 1090 1091Joins two strings together so they appear as one. The field name is specified 1092as the method of joining the two strings depends on this. Returns the 1093combined string. 1094 1095=item C<is_7bit_clean($str)> 1096 1097Returns true is the string contains only 7-bit characters (ASCII 32 through 126, 1098ASCII 10 (LineFeed) and ASCII 13 (Carrage Return). 1099 1100=item C<disable_utf8()> 1101 1102Disable utf8 on STDOUT (and display raw data instead). 1103 1104=item C<detect_encoding($str)> 1105 1106Guesses what encoding a given data is encoded in, returning the canonical name 1107of the detected encoding (which may be different from the MIME charset 1108specification). 1109 1110=item C<clean_text($str)> 1111Returns the parameter "cleaned" by exchanging non-printable characters with spaces. 1112Specifically characters (ASCII 0 through 31) and (ASCII 127) will become ASCII 32 (Space). 1113 1114=item C<get_text> 1115 1116=over 1117 1118=item B<Description> 1119 1120This is a method of getting localized strings within Bugzilla code. 1121Use this when you don't want to display a whole template, you just 1122want a particular string. 1123 1124It uses the F<global/message.txt.tmpl> template to return a string. 1125 1126=item B<Params> 1127 1128=over 1129 1130=item C<$message> - The identifier for the message. 1131 1132=item C<$vars> - A hashref. Any variables you want to pass to the template. 1133 1134=back 1135 1136=item B<Returns> 1137 1138A string. 1139 1140=back 1141 1142 1143=item C<template_var> 1144 1145This is a method of getting the value of a variable from a template in 1146Perl code. The available variables are in the C<global/field-descs.none.tmpl> 1147template. Just pass in the name of the variable that you want the value of. 1148 1149 1150=back 1151 1152=head2 Formatting Time 1153 1154=over 4 1155 1156=item C<format_time($time)> 1157 1158Takes a time and converts it to the desired format and timezone. 1159If no format is given, the routine guesses the correct one and returns 1160an empty array if it cannot. If no timezone is given, the user's timezone 1161is used, as defined in his preferences. 1162 1163This routine is mainly called from templates to filter dates, see 1164"FILTER time" in L<Bugzilla::Template>. 1165 1166=item C<datetime_from($time, $timezone)> 1167 1168Returns a DateTime object given a date string. If the string is not in some 1169valid date format that C<strptime> understands, we return C<undef>. 1170 1171You can optionally specify a timezone for the returned date. If not 1172specified, defaults to the currently-logged-in user's timezone, or 1173the Bugzilla server's local timezone if there isn't a logged-in user. 1174 1175=back 1176 1177=head2 Cryptography 1178 1179=over 4 1180 1181=item C<bz_crypt($password, $salt)> 1182 1183Takes a string and returns a hashed (encrypted) value for it, using a 1184random salt. An optional salt string may also be passed in. 1185 1186Please always use this function instead of the built-in perl C<crypt> 1187function, when checking or setting a password. Bugzilla does not use 1188C<crypt>. 1189 1190=begin undocumented 1191 1192Random salts are generated because the alternative is usually 1193to use the first two characters of the password itself, and since 1194the salt appears in plaintext at the beginning of the encrypted 1195password string this has the effect of revealing the first two 1196characters of the password to anyone who views the encrypted version. 1197 1198=end undocumented 1199 1200=item C<generate_random_password($password_length)> 1201 1202Returns an alphanumeric string with the specified length 1203(10 characters by default). Use this function to generate passwords 1204and tokens. 1205 1206=back 1207 1208=head2 Validation 1209 1210=over 4 1211 1212=item C<validate_email_syntax($email)> 1213 1214Do a syntax checking for a legal email address and returns 1 if 1215the check is successful, else returns 0. 1216Untaints C<$email> if successful. 1217 1218=item C<check_email_syntax($email)> 1219 1220Do a syntax checking for a legal email address and throws an error 1221if the check fails. 1222Untaints C<$email> if successful. 1223 1224=item C<validate_date($date)> 1225 1226Make sure the date has the correct format and returns 1 if 1227the check is successful, else returns 0. 1228 1229=back 1230 1231=head2 Database 1232 1233=over 1234 1235=item C<on_main_db> 1236 1237Runs a block of code always on the main DB. Useful for when you're inside 1238a subroutine and need to do some writes to the database, but don't know 1239if Bugzilla is currently using the shadowdb or not. Used like: 1240 1241 on_main_db { 1242 my $dbh = Bugzilla->dbh; 1243 $dbh->do("INSERT ..."); 1244 } 1245 1246=back 1247