1package Bio::Graphics::Wiggle; 2 3=head1 NAME 4 5Bio::Graphics::Wiggle -- Binary storage for dense genomic features 6 7=head1 SYNOPSIS 8 9 # all positions are 1-based 10 11 my $wig = Bio::Graphics::Wiggle->new('./test.wig', 12 $writeable, 13 { seqid => $seqid, 14 start => $start, 15 step => $step, 16 min => $min, 17 max => $max }); 18 19 $wig->erase; 20 21 my $seqid = $wig->seqid('new_id'); 22 my $max = $wig->max($new_max); 23 my $min = $wig->min($new_min); 24 my $step = $wig->step($new_step); # data stored at modulus step == 0; all else is blank 25 26 $wig->set_value($position => $value); # store $value at position 27 $wig->set_values($position => \@values); # store array of values at position 28 $wig->set_range($start=>$end,$value); # store the same $value from $start to $end 29 30 my $value = $wig->value($position); # fetch value from position 31 my $values = $wig->values($start,$end); # fetch range of data from $start to $end 32 33 $wig->window(100); # sample window size 34 $wig->smoothing('mean'); # when sampling, compute the mean value across sample window 35 my $values = $wig->values($start,$end,$samples); # fetch $samples data points from $start to $end 36 37 38=head1 DESCRIPTION 39 40IMPORTANT NOTE: This implementation is still not right. See 41http://genomewiki.ucsc.edu/index.php/Wiggle for a more space-efficient 42implementation. 43 44This module stores "wiggle" style quantitative genome data for display 45in a genome browser application. The data for each chromosome (or 46contig, or other reference sequence) is stored in a single file in the 47following format: 48 49 256 byte header 50 50 bytes seqid, zero-terminated C string 51 4 byte long integer, value of "step" (explained later) 52 4 byte perl native float, the "min" value 53 4 byte perl native float, the "max" value 54 4 byte long integer, value of "span" 55 4 byte perl native float, the mean 56 4 byte perl native float, the standard deviation 57 2 byte unsigned short, the version number (currently version 0) 58 4 byte long integer, sequence start position (in 0-based coordinates) 59 null padding to 256 bytes for future use 60 61The remainder of the file consists of 8-bit unsigned scaled integer 62values. This means that all quantitative data will be scaled to 8-bit 63precision! 64 65For a convenient method of creating Wiggle files from UCSC-type WIG 66input and creating GFF3 output, please see 67L<Bio::Graphics::Wiggle::Loader>. 68 69=head1 METHODS 70 71=head2 Constructor and Accessors 72 73=over 4 74 75=item $wig = Bio::Graphics::Wiggle->new($filename,$writeable,{options}) 76 77Open/create a wiggle-format data file: 78 79 $filename -- path to the file to open/create 80 $writeable -- boolean value indicating whether file is 81 writeable. Missing files will only be created 82 if $writeable set to a true value. If path is 83 empty (undef or empty string) and writeable is true, 84 new() will create a temporary file that will be 85 deleted when the object goes out of scope. 86 {options} -- hash ref of the following named options, only valid 87 when creating a new wig file with $writeable true. 88 89 option name description default 90 ----------- ----- ------- 91 seqid name/id of sequence empty name 92 min minimum value of data points 0 93 max maximum value of data points 255 94 step interval between data points 1 95 span width of data points value of "step" 96 97The "step" can be used to create sparse files to save space. By 98default, step is set to 1, in which case a data value will be stored 99at each base of the sequence. By setting step to 10, then each value 100is taken to correspond to 10 bp, and the file will be 10x smaller. 101For example, consider this step 5 data set: 102 103 1 2 3 4 5 6 7 8 9 10 11 12 13 14 104 20 . . . . 60 . . . . 80 . . . 105 106We have stored the values "20" "60" and "80" at positions 1, 6 and 11, 107respectively. When retrieving this data, it will appear as if 108positions 1 through 5 have a value of 20, positions 6-10 have a value 109of 60, and positions 11-14 have a value of 80. In the data file, we 110store, positions 1,6,and 11 in adjacent bytes. 111 112Note that no locking is performed by this module. If you wish to allow 113multi-user write access to the databases files, you will need to 114flock() the files yourself. 115 116=item $seqid = $wig->seqid(['new_id']) 117 118=item $max = $wig->max([$new_max]) 119 120=item $min = $wig->min([$new_min]) 121 122=item $step = $wig->step([$new_step]) 123 124=item $span = $wig->span([$new_span]) 125 126=item $mean = $wig->mean([$new_mean]); 127 128=item $stdev = $wig->stdev([$new_stdev]); 129 130These accessors get or set the corresponding values. Setting is only 131allowed if the file was opened for writing. Note that changing the 132min, max and step after writing data to the file under another 133parameter set will produce unexpected (and invalid) results, as the 134existing data is not automatically updated to be consistent. 135 136=item $trim = $wig->trim([$new_trim]); 137 138The trim method sets the trimming method, which can be used to trim 139out extreme values. Three methods are currently supported: 140 141 none No trimming 142 stdev Trim 1 standard deviation above and below mean 143 stdevN Trim N standard deviations above and below the mean 144 145In "stdevN", any can be any positive integer. 146 147=back 148 149=head2 Setting Data 150 151=over 4 152 153=item $wig->set_value($position => $value) 154 155This method sets the value at $position to $value. If a step>1 is in 156force, then $position will be rounded down to the nearest multiple of 157step. 158 159=item $wig->set_range($start=>$end, $value) 160 161This method sets the value of all bases between $start and $end to 162$value, honoring step. 163 164=item $sig->set_values($position => \@values) 165 166This method writes an array of values into the datababase beginning at 167$position (or the nearest lower multiple of step). If step>1, then 168values will be written at step intervals. 169 170=back 171 172=head2 Retrieving Data 173 174=over 4 175 176=item $value = $wig->value($position) 177 178Retrieve the single data item at position $position, or the nearest 179lower multiple of $step if step>1. 180 181=item $values = $wig->values($start=>$end) 182 183Retrieve the values in the range $start to $end and return them as an 184array ref. Note that you will always get an array of size 185($end-$start+1) even if step>1; the data in between the step intervals 186will be filled in. 187 188=item $values = $wig->values($start=>$end,$samples) 189 190Retrieve a sampling of the values between $start and $end. Nothing 191very sophisticated is done here; the code simply returns the number of 192values indicated in $samples, smoothed according to the smoothing 193method selected (default to "mean"), then selected at even intervals 194from the range $start to $end. The return value is an arrayref of 195exactly $samples values. 196 197=item $string = $wig->export_to_wif($start,$end) 198 199=item $string = $wig->export_to_wif64($start,$end) 200 201Export the region from start to end in the "wif" format. This data can 202later be imported into another Bio::Graphics::Wiggle object. The first 203version returns a binary string. The second version returns a base64 204encoded version that is safe for ascii-oriented formata such as GFF3 205and XML. 206 207=item $wig->import_from_wif($string) 208 209=item $wig->import_from_wif64($string) 210 211Import a wif format data string into the Bio::Graphics::Wiggle 212object. The first version expects a binary string. The second version 213expects a base64 encoded version that is safe for ascii-oriented 214formata such as GFF3 and XML. 215 216=back 217 218 219=cut 220 221# read/write genome tiling data, to be compatible with Jim Kent's WIG format 222use strict; 223use warnings; 224use IO::File; 225use Carp 'croak','carp','confess'; 226 227use constant HEADER_LEN => 256; 228 # seqid, step, min, max, span, mean, stdev, version, start 229use constant HEADER => '(Z50LFFLFFSL)@'.HEADER_LEN; 230use constant BODY => 'C'; 231use constant DEBUG => 0; 232use constant DEFAULT_SMOOTHING => 'mean'; 233use constant VERSION => 0; 234our $VERSION = '1.0'; 235 236sub new { 237 my $class = shift; 238 my ($path,$write,$options) = @_; 239 $path ||= ''; # to avoid uninit warning 240 my $mode = $write ? -e $path # if file already exists... 241 ? '+<' # ...open for read/write 242 : '+>' # ...else clobber and open a new one 243 : '<'; # read only 244 my $fh = $class->new_fh($path,$mode); 245 $fh or die (($path||'temporary file').": $!"); 246 247 $options ||= {}; 248 249 my $self = bless {fh => $fh, 250 write => $write, 251 dirty => scalar keys %$options 252 }, ref $class || $class; 253 254 my $stored_options = eval {$self->_readoptions} || {}; 255 $options->{start}-- if defined $options->{start}; # 1-based ==> 0-based coordinates 256 my %merged_options = (%$stored_options,%$options); 257 # warn "merged options = ",join ' ',%merged_options; 258 $merged_options{version}||= 0; 259 $merged_options{seqid} ||= 'chrUnknown'; 260 $merged_options{min} ||= 0; 261 $merged_options{max} ||= 255; 262 $merged_options{mean} ||= 128; 263 $merged_options{stdev} ||= 255; 264 $merged_options{trim} ||= 'none'; 265 $merged_options{step} ||= 1; 266 $merged_options{start} ||= 0; 267 $merged_options{span} ||= $merged_options{step}; 268 $self->{options} = \%merged_options; 269 $self->_do_trim unless $self->trim eq 'none'; 270 return $self; 271} 272 273sub new_fh { 274 my $self = shift; 275 my ($path,$mode) = @_; 276 return $path ? IO::File->new($path,$mode) 277 : IO::File->new_tmpfile; 278} 279 280sub end { 281 my $self = shift; 282 unless (defined $self->{end}) { 283 my $size = (stat($self->fh))[7]; 284 my $data_len = $size - HEADER_LEN(); 285 return unless $data_len>0; # undef end 286 $self->{end} = ($self->start-1) + $data_len * $self->step; 287 } 288 return $self->{end}; 289} 290 291sub DESTROY { shift->write } 292 293sub erase { 294 my $self = shift; 295 $self->fh->truncate(HEADER_LEN); 296} 297 298sub fh { shift->{fh} } 299sub seek { shift->fh->seek(shift,0) } 300sub tell { shift->fh->tell() } 301 302sub _option { 303 my $self = shift; 304 my $option = shift; 305 my $d = $self->{options}{$option}; 306 if (@_) { 307 $self->{dirty}++; 308 $self->{options}{$option} = shift; 309 delete $self->{scale} if $option eq 'min' or $option eq 'max'; 310 } 311 return $d; 312} 313 314sub version { shift->_option('version',@_) } 315sub seqid { shift->_option('seqid',@_) } 316sub min { shift->_option('min',@_) } 317sub max { shift->_option('max',@_) } 318sub step { shift->_option('step',@_) } 319sub span { shift->_option('span',@_) } 320sub mean { shift->_option('mean',@_) } 321sub stdev { shift->_option('stdev',@_) } 322sub trim { shift->_option('trim',@_) } 323 324sub start { # slightly different because we have to deal with 1 vs 0-based coordinates 325 my $self = shift; 326 my $start = $self->_option('start'); 327 $start++; # convert into 1-based coordinates 328 if (@_) { 329 my $newstart = shift; 330 $self->_option('start',$newstart-1); # store in zero-based coordinates 331 } 332 return $start; 333} 334 335sub smoothing { 336 my $self = shift; 337 my $d = $self->{smoothing} || DEFAULT_SMOOTHING; 338 $self->{smoothing} = shift if @_; 339 $d; 340} 341 342sub write { 343 my $self = shift; 344 if ($self->{dirty} && $self->{write}) { 345 $self->_writeoptions($self->{options}); 346 undef $self->{dirty}; 347 $self->fh->flush; 348 } 349} 350 351sub _readoptions { 352 my $self = shift; 353 my $fh = $self->fh; 354 my $header; 355 $fh->seek(0,0); 356 return unless $fh->read($header,HEADER_LEN) == HEADER_LEN; 357 return $self->_parse_header($header); 358} 359 360sub _parse_header { 361 my $self = shift; 362 my $header = shift; 363 my ($seqid,$step,$min,$max,$span, 364 $mean,$stdev,$version,$start) = unpack(HEADER,$header); 365 return { seqid => $seqid, 366 step => $step, 367 span => $span, 368 min => $min, 369 max => $max, 370 mean => $mean, 371 stdev => $stdev, 372 version => $version, 373 start => $start, 374 }; 375} 376 377sub _generate_header { 378 my $self = shift; 379 my $options = shift; 380 return pack(HEADER,@{$options}{qw(seqid step min max span mean stdev version start)}); 381} 382 383sub _writeoptions { 384 my $self = shift; 385 my $options = shift; 386 my $fh = $self->fh; 387 my $header = $self->_generate_header($options); 388 $fh->seek(0,0); 389 $fh->print($header) or die "write failed: $!"; 390} 391 392sub _do_trim { 393 my $self = shift; 394 395 # don't trim if there is no score range 396 ($self->max - $self->min) or return; 397 398 my $trim = lc $self->trim; 399 my ($method,$arg); 400 if ($trim =~ /([a-z]+)(\d+)/) { 401 $method = "_trim_${1}"; 402 $arg = $2; 403 } 404 else { 405 $method = "_trim_${trim}"; 406 } 407 unless ($self->can($method)) { 408 carp "invalid trim method $trim"; 409 return; 410 } 411 412 $self->$method($arg); 413} 414 415# trim n standard deviations from the mean 416sub _trim_stdev { 417 my $self = shift; 418 my $factor = shift || 1; 419 my $mean = $self->mean; 420 my $stdev = $self->stdev * $factor; 421 my $min = $self->min > $mean - $stdev ? $self->min : $mean - $stdev; 422 my $max = $self->max < $mean + $stdev ? $self->max : $mean + $stdev; 423 warn "_trim_stdev (* $factor) : setting min to $min, max to $max (was ",$self->min,',',$self->max,')' 424 if DEBUG; 425 $self->min($min); 426 $self->max($max); 427} 428 429sub set_value { 430 my $self = shift; 431 croak "usage: \$wig->set_value(\$position => \$value)" 432 unless @_ == 2; 433 $self->value(@_); 434} 435 436sub set_range { 437 my $self = shift; 438 croak "usage: \$wig->set_range(\$start_position => \$end_position, \$value)" 439 unless @_ == 3; 440 $self->value(@_); 441} 442 443sub value { 444 my $self = shift; 445 my $position = shift; 446 447 my $offset = $self->_calculate_offset($position); 448 $offset >= HEADER_LEN or die "Tried to retrieve data from before start position"; 449 $self->seek($offset) or die "Seek failed: $!"; 450 451 if (@_ == 2) { 452 my $end = shift; 453 my $new_value = shift; 454 my $step = $self->step; 455 my $scaled_value = $self->scale($new_value); 456 $self->fh->print(pack('C*',($scaled_value)x(($end-$position+1)/$step))) or die "Write failed: $!"; 457 $self->{end} = $end if !exists $self->{end} || $self->{end} < $end; 458 } 459 460 elsif (@_==1) { 461 my $new_value = shift; 462 my $scaled_value = $self->scale($new_value); 463 $self->fh->print(pack('C*',$scaled_value)) or die "Write failed: $!"; 464 $self->{end} = $position if !exists $self->{end} || $self->{end} < $position; 465 return $new_value; 466 } 467 468 else { # retrieving data 469 my $buffer; 470 $self->fh->read($buffer,1) or die "Read failed: $!"; 471 my $scaled_value = unpack('C*',$buffer); 472 473 # missing data, so look back at most span values to get it 474 if ($scaled_value == 0 && (my $span = $self->span) > 1) { 475 $offset = $self->_calculate_offset($position-$span+1); 476 $offset >= HEADER_LEN or die "Tried to retrieve data from before start position"; 477 $self->seek($offset) or die "Seek failed: $!"; 478 479 $self->fh->read($buffer,$span/$self->step); 480 for (my $i=length($buffer)-2;$i>=0;$i--) { 481 my $val = substr($buffer,$i,1); 482 next if $val eq "\0"; 483 $scaled_value = unpack('C*',$val); 484 last; 485 } 486 487 } 488 return $self->unscale($scaled_value); 489 } 490} 491 492sub _calculate_offset { 493 my $self = shift; 494 my $position = shift; 495 my $step = $self->step; 496 my $start = $self->start; 497 return HEADER_LEN + int(($position-$start)/$step); 498} 499 500sub set_values { 501 my $self = shift; 502 croak "usage: \$wig->set_values(\$position => \@values)" 503 unless @_ == 2 and ref $_[1] eq 'ARRAY'; 504 $self->values(@_); 505} 506 507# read or write a series of values 508sub values { 509 my $self = shift; 510 my $start = shift; 511 if (ref $_[0] && ref $_[0] eq 'ARRAY') { 512 $self->_store_values($start,@_); 513 } else { 514 $self->_retrieve_values($start,@_); 515 } 516} 517 518sub export_to_wif64 { 519 my $self = shift; 520 my $data = $self->export_to_wif(@_); 521 eval "require MIME::Base64" 522 unless MIME::Base64->can('encode_base64'); 523 return MIME::Base64::encode_base64($data); 524} 525sub import_from_wif64 { 526 my $self = shift; 527 my $data = shift; 528 529 eval "require MIME::Base64" 530 unless MIME::Base64->can('decode_base64'); 531 return $self->import_from_wif(MIME::Base64::decode_base64($data)); 532} 533 534# subregion in "wiggle interchange format" (wif) 535sub export_to_wif { 536 my $self = shift; 537 my ($start,$end) = @_; 538 539 # get the 256 byte header 540 my $data = $self->_generate_header($self->{options}); 541 542 # add the range to the data (8 bytes overhead) 543 $data .= pack("L",$start); 544 $data .= pack("L",$end); 545 546 # add the packed data for this range 547 $data .= $self->_retrieve_packed_range($start,$end-$start+1,$self->step); 548 return $data; 549} 550 551sub export_to_bedgraph { 552 my $self = shift; 553 my ($start,$end,$fh) = @_; 554 my $max_range = 100_000; 555 556 $start ||= 1; 557 $end ||= $self->end; 558 559 my $lines; 560 for (my $s=$start;$s<$end;$s+=$max_range) { 561 my $e = $s + $max_range - 1; 562 $e = $end if $e > $end; 563 my $b = $self->values($s,$e); 564 $lines .= $self->_bedgraph_lines($s,$b,$fh); 565 } 566 567 return $lines; 568} 569 570sub _bedgraph_lines { 571 my $self = shift; 572 my ($start,$values,$fh) = @_; 573 my $seqid = $self->seqid; 574 my $result; 575 576 my ($last_val,$last_start,$end); 577 $last_start = $start-1; # 0 based indexing 578 for (my $i=0;$i<@$values;$i++) { 579 my $v = $values->[$i]; 580 581 if (!defined $v) { 582 if (defined $last_val) { 583 $result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val); 584 undef $last_val; 585 } 586 $last_start = $start+$i; 587 next; 588 } 589 590 if (defined $last_val && $last_val != $v) { 591 $result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val); 592 $last_start = $start+$i-1; 593 } 594 595 $last_val = $v; 596 $end = $start+$i-1; 597 } 598 $result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$end+1,$last_val) if $last_val; 599 return $result; 600} 601 602sub _append_or_print_bedgraph { 603 my $self = shift; 604 my ($fh,$seqid,$start,$end,$val) = @_; 605 my $data = join("\t",$seqid,$start,$end,sprintf("%.2f",$val))."\n"; 606 if ($fh) { 607 print $fh $data; 608 return ''; 609 } else { 610 return $data; 611 } 612} 613 614sub import_from_wif { 615 my $self = shift; 616 my $wifdata = shift; 617 618 # BUG: should check that header is compatible 619 my $header = substr($wifdata,0,HEADER_LEN); 620 my $start = unpack('L',substr($wifdata,HEADER_LEN, 4)); 621 my $end = unpack('L',substr($wifdata,HEADER_LEN+4,4)); 622 623 my $options = $self->_parse_header($header); 624 my $stored_options = eval {$self->_readoptions} || {}; 625 my %merged_options = (%$stored_options,%$options); 626 $self->{options} = \%merged_options; 627 $self->{dirty}++; 628 629 # write the data 630 $self->seek($self->_calculate_offset($start)); 631 $self->fh->print(substr($wifdata,HEADER_LEN+8)) or die "write failed: $!"; 632 $self->{end} = $end if !defined $self->{end} or $self->{end} < $end; 633} 634 635sub _retrieve_values { 636 my $self = shift; 637 my ($start,$end,$samples) = @_; 638 639 my $data_start = $self->start; 640 my $step = $self->step; 641 my $span = $self->span; 642 643 croak "Value of start position ($start) is less than data start of $data_start" 644 unless $start >= $data_start; 645 croak "Value of end position ($end) is greater than data end of ",$self->end+$span, 646 unless $end <= $self->end + $span; 647 648 # generate list of positions to sample from 649 my $length = $end-$start+1; 650 $samples ||= $length; 651 652# warn "samples = $samples, length=$length, span=$span, step=$step"; 653 654 # if the length is grossly greater than the samples, then we won't even 655 # bother fetching all the data, but just sample into the disk file 656 if ($length/$samples > 100 && $step == 1) { 657 my @result; 658# my $window = 20*($span/$step); 659 my $interval = $length/$samples; 660# my $window = 100*$interval/$span; 661 my $window = $interval/2; 662# warn "window = $window, interval = $interval"; 663 for (my $i=0;$i<$samples;$i++) { 664 my $packed_data = $self->_retrieve_packed_range(int($start+$i*$interval-$window), 665 int($window), 666 $step); 667 my @bases= grep {$_} unpack('C*',$packed_data); 668 if (@bases) { 669 local $^W = 0; 670 my $arry = $self->unscale(\@bases); 671 my $n = @$arry; 672 my $total = 0; 673 $total += $_ foreach @$arry; 674 my $mean = $total/$n; 675 my $max; 676 for (@$arry) { $max = $_ if !defined $max || $max < $_ } 677# warn $start+$i*$interval,': ',join(',',map {int($_)} @$arry), 678# " mean = $mean, max = $max"; 679# push @result,$mean; 680 push @result,$max; 681 } else { 682 push @result,0; 683 } 684 } 685 return \@result; 686 } 687 688 my $packed_data = $self->_retrieve_packed_range($start,$length,$step); 689 690 my @bases; 691 $#bases = $length-1; 692 693 if ($step == $span) { 694 # in this case, we do not have any partially-empty 695 # steps, so can operate on the step-length data structure 696 # directly 697 @bases = unpack('C*',$packed_data); 698 } 699 700 else { 701 # In this case some regions may have partially missing data, 702 # so we create an array equal to the length of the requested region, 703 # fill it in, and then sample it 704 for (my $i=0; $i<length $packed_data; $i++) { 705 my $index = $i * $step; 706 my $value = unpack('C',substr($packed_data,$i,1)); 707 next unless $value; # ignore 0 values 708 @bases[$index..$index+$span-1] = ($value) x $span; 709 } 710 $#bases = $length-1; 711 } 712 713 my $r = $self->unscale(\@bases); 714 $r = $self->sample($r,$samples); 715 $r = $self->smooth($r,$self->window * $samples/@bases) 716 if defined $self->window && $self->window>1; 717 return $r; 718} 719 720sub _retrieve_packed_range { 721 my $self = shift; 722 my ($start,$length,$step) = @_; 723 my $span = $self->span; 724 725 my $offset = $self->_calculate_offset($start); 726 727 $self->seek($offset); 728 my $packed_data; 729 $self->fh->read($packed_data,$length/$step); 730 731 # pad data up to required amount 732 $packed_data .= "\0" x ($length/$step-length($packed_data)) 733 if length $packed_data < $length/$step; 734 return $packed_data; 735} 736 737 738sub sample { 739 my $self = shift; 740 my ($values,$samples) = @_; 741 my $length = @$values; 742 my $window_size = $length/$samples; 743 744 my @samples; 745 $#samples = $samples-1; 746 747 if ($window_size < 2) { # no data smoothing needed 748 @samples = map { $values->[$_*$window_size] } (0..$samples-1); 749 } 750 else { 751 my $smoothsub = $self->smoothsub; 752 for (my $i=0; $i<$samples; $i++) { 753 my $start = $i * $window_size; 754 my $end = $start + $window_size - 1; 755 my @window = @{$values}[$start..$end]; 756 757 my $value = $smoothsub->(\@window); 758 $samples[$i] = $value; 759 } 760 } 761 762 return \@samples; 763} 764 765sub smoothsub { 766 my $self = shift; 767 768 my $smoothing = $self->smoothing; 769 my $smoothsub = $smoothing eq 'mean' ? \&sample_mean 770 :$smoothing eq 'max' ? \&sample_max 771 :$smoothing eq 'min' ? \&sample_min 772 :$smoothing eq 'none' ? \&sample_center 773 :croak("invalid smoothing type '$smoothing'"); 774 return $smoothsub; 775} 776 777sub smooth { 778 my ($self,$data,$window) = @_; 779 780 my $smoothing = $self->smoothing; 781 $window ||= $self->window; 782 783 return $data if $smoothing eq 'none' || $window < 2; 784 785 my @data = @$data; 786 my $smoother = $self->smoothsub; 787 $window++ unless $window % 2; 788 my $offset = int($window/2); 789 790 for (my $i=$offset; $i<@$data-$offset; $i++) { 791 my $start = $i - $offset; 792 my $end = $i + $offset; 793 my @subset = @data[$start..$end]; 794 $data->[$i] = $smoother->(\@subset); 795 } 796 797 return $data; 798} 799 800sub window { 801 my $self = shift; 802 my $d = $self->{window}; 803 $self->{window} = shift if @_; 804 $d; 805} 806 807sub sample_mean { 808 my $values = shift; 809 my ($total,$items); 810 for my $v (@$values) { 811 next unless defined $v; 812 $items++; 813 $total+=$v; 814 } 815 return $items ? $total/$items : undef; 816} 817 818sub sample_max { 819 my $values = shift; 820 my $max; 821 for my $v (@$values) { 822 next unless defined $v; 823 $max = $v if !defined $max or $max < $v; 824 } 825 return $max; 826} 827 828sub sample_min { 829 my $values = shift; 830 my $min; 831 for my $v (@$values) { 832 next unless defined $v; 833 $min = $v if !defined $min or $min > $v; 834 } 835 return $min; 836} 837 838sub sample_center { 839 my $values = shift; 840 return $values->[@$values/2]; 841} 842 843sub _store_values { 844 my $self = shift; 845 my ($position,$data) = @_; 846 847 # where does data start 848 my $offset = $self->_calculate_offset($position); 849 my $fh = $self->fh; 850 my $step = $self->step; 851 852 my $scaled = $self->scale($data); 853 854 $self->seek($offset); 855 my $packed_data = pack('C*',@$scaled); 856 $fh->print($packed_data) or die "Write failed: $!"; 857 858 my $new_end = $position+@$data-1; 859 $self->{end} = $new_end if !exists $self->{end} || $self->{end} < $new_end; 860} 861 862# zero means "no data" 863# everything else is scaled from 1-255 864sub scale { 865 my $self = shift; 866 my $values = shift; 867 my $scale = $self->_get_scale; 868 my $min = $self->{options}{min}; 869 if (ref $values && ref $values eq 'ARRAY') { 870 my @return = map { 871 my $i = ($_ - $min)/$scale; 872 my $v = 1 + int($i+0.5*($i<=>0)); # avoid call to round() 873 $v = 1 if $v < 1; 874 $v = 255 if $v > 255; 875 $v; 876 } @$values; 877 return \@return; 878 } else { 879 my $v = 1 + round (($values - $min)/$scale); 880 $v = 1 if $v < 1; 881 $v = 255 if $v > 255; 882 return $v; 883 } 884} 885 886sub unscale { 887 my $self = shift; 888 my $values = shift; 889 my $scale = $self->_get_scale; 890 my $min = $self->{options}{min}; 891 892 if (ref $values && ref $values eq 'ARRAY') { 893 my @return = map {$_ ? (($_-1) * $scale + $min) : undef} @$values; 894 return \@return; 895 } else { 896 return $values ? ($values-1) * $scale + $min : undef; 897 } 898} 899 900sub _get_scale { 901 my $self = shift; 902 unless ($self->{scale}) { 903 my $min = $self->{options}{min}; 904 my $max = $self->{options}{max}; 905 my $range = $max - $min || 0.001; # can't be zero! 906 $self->{scale} = $range/254; 907 } 908 return $self->{scale}; 909} 910 911sub round { 912 return int($_[0]+0.5*($_[0]<=>0)); 913} 914 915 9161; 917 918__END__ 919 920=head1 SEE ALSO 921 922L<Bio::Graphics::Wiggle::Loader>, 923L<Bio::Graphics::Panel>, 924L<Bio::Graphics::Glyph>, 925L<Bio::Graphics::Feature>, 926L<Bio::Graphics::FeatureFile> 927 928=head1 AUTHOR 929 930Lincoln Stein E<lt>lstein@cshl.orgE<gt>. 931 932Copyright (c) 2007 Cold Spring Harbor Laboratory 933 934This package and its accompanying libraries is free software; you can 935redistribute it and/or modify it under the terms of the GPL (either 936version 1, or at your option, any later version) or the Artistic 937License 2.0. Refer to LICENSE for the full license text. In addition, 938please see DISCLAIMER.txt for disclaimers of warranty. 939 940=cut 941