1package Bio::Graphics::Wiggle;
2
3=head1 NAME
4
5Bio::Graphics::Wiggle -- Binary storage for dense genomic features
6
7=head1 SYNOPSIS
8
9 # all positions are 1-based
10
11 my $wig = Bio::Graphics::Wiggle->new('./test.wig',
12                                      $writeable,
13                                     { seqid => $seqid,
14                                       start => $start,
15                                       step  => $step,
16                                       min   => $min,
17                                       max   => $max });
18
19 $wig->erase;
20
21 my $seqid = $wig->seqid('new_id');
22 my $max   = $wig->max($new_max);
23 my $min   = $wig->min($new_min);
24 my $step  = $wig->step($new_step);   # data stored at modulus step == 0; all else is blank
25
26 $wig->set_value($position  => $value);   # store $value at position
27 $wig->set_values($position => \@values); # store array of values at position
28 $wig->set_range($start=>$end,$value);    # store the same $value from $start to $end
29
30 my $value  = $wig->value($position);     # fetch value from position
31 my $values = $wig->values($start,$end);  # fetch range of data from $start to $end
32
33 $wig->window(100);                       # sample window size
34 $wig->smoothing('mean');                 # when sampling, compute the mean value across sample window
35 my $values = $wig->values($start,$end,$samples);  # fetch $samples data points from $start to $end
36
37
38=head1 DESCRIPTION
39
40IMPORTANT NOTE: This implementation is still not right. See
41http://genomewiki.ucsc.edu/index.php/Wiggle for a more space-efficient
42implementation.
43
44This module stores "wiggle" style quantitative genome data for display
45in a genome browser application. The data for each chromosome (or
46contig, or other reference sequence) is stored in a single file in the
47following format:
48
49  256 byte header
50      50 bytes seqid, zero-terminated C string
51      4  byte long integer, value of "step" (explained later)
52      4  byte perl native float, the "min" value
53      4  byte perl native float, the "max" value
54      4  byte long integer, value of "span"
55      4  byte perl native float, the mean
56      4  byte perl native float, the standard deviation
57      2  byte unsigned short, the version number (currently version 0)
58      4  byte long integer, sequence start position (in 0-based coordinates)
59      null padding to 256 bytes for future use
60
61The remainder of the file consists of 8-bit unsigned scaled integer
62values. This means that all quantitative data will be scaled to 8-bit
63precision!
64
65For a convenient method of creating Wiggle files from UCSC-type WIG
66input and creating GFF3 output, please see
67L<Bio::Graphics::Wiggle::Loader>.
68
69=head1 METHODS
70
71=head2 Constructor and Accessors
72
73=over 4
74
75=item $wig = Bio::Graphics::Wiggle->new($filename,$writeable,{options})
76
77Open/create a wiggle-format data file:
78
79  $filename  -- path to the file to open/create
80  $writeable -- boolean value indicating whether file is
81                writeable. Missing files will only be created
82                if $writeable set to a true value. If path is
83                empty (undef or empty string) and writeable is true,
84                new() will create a temporary file that will be
85                deleted when the object goes out of scope.
86  {options}  -- hash ref of the following named options, only valid
87                when creating a new wig file with $writeable true.
88
89        option name    description                  default
90        -----------    -----                        -------
91          seqid        name/id of sequence          empty name
92          min          minimum value of data points 0
93          max          maximum value of data points 255
94          step         interval between data points 1
95          span         width of data points         value of "step"
96
97The "step" can be used to create sparse files to save space. By
98default, step is set to 1, in which case a data value will be stored
99at each base of the sequence. By setting step to 10, then each value
100is taken to correspond to 10 bp, and the file will be 10x smaller.
101For example, consider this step 5 data set:
102
103    1  2  3  4  5  6  7  8  9 10 11 12 13 14
104   20  .  .  .  . 60  .  .  .  . 80  .  .  .
105
106We have stored the values "20" "60" and "80" at positions 1, 6 and 11,
107respectively. When retrieving this data, it will appear as if
108positions 1 through 5 have a value of 20, positions 6-10 have a value
109of 60, and positions 11-14 have a value of 80. In the data file, we
110store, positions 1,6,and 11 in adjacent bytes.
111
112Note that no locking is performed by this module. If you wish to allow
113multi-user write access to the databases files, you will need to
114flock() the files yourself.
115
116=item $seqid = $wig->seqid(['new_id'])
117
118=item $max   = $wig->max([$new_max])
119
120=item $min   = $wig->min([$new_min])
121
122=item $step  = $wig->step([$new_step])
123
124=item $span  = $wig->span([$new_span])
125
126=item $mean  = $wig->mean([$new_mean]);
127
128=item $stdev = $wig->stdev([$new_stdev]);
129
130These accessors get or set the corresponding values. Setting is only
131allowed if the file was opened for writing. Note that changing the
132min, max and step after writing data to the file under another
133parameter set will produce unexpected (and invalid) results, as the
134existing data is not automatically updated to be consistent.
135
136=item $trim  = $wig->trim([$new_trim]);
137
138The trim method sets the trimming method, which can be used to trim
139out extreme values. Three methods are currently supported:
140
141  none    No trimming
142  stdev   Trim 1 standard deviation above and below mean
143  stdevN  Trim N standard deviations above and below the mean
144
145In "stdevN", any can be any positive integer.
146
147=back
148
149=head2 Setting Data
150
151=over 4
152
153=item $wig->set_value($position => $value)
154
155This method sets the value at $position to $value. If a step>1 is in
156force, then $position will be rounded down to the nearest multiple of
157step.
158
159=item $wig->set_range($start=>$end, $value)
160
161This method sets the value of all bases between $start and $end to
162$value, honoring step.
163
164=item $sig->set_values($position => \@values)
165
166This method writes an array of values into the datababase beginning at
167$position (or the nearest lower multiple of step). If step>1, then
168values will be written at step intervals.
169
170=back
171
172=head2 Retrieving Data
173
174=over 4
175
176=item $value = $wig->value($position)
177
178Retrieve the single data item at position $position, or the nearest
179lower multiple of $step if step>1.
180
181=item $values = $wig->values($start=>$end)
182
183Retrieve the values in the range $start to $end and return them as an
184array ref. Note that you will always get an array of size
185($end-$start+1) even if step>1; the data in between the step intervals
186will be filled in.
187
188=item $values = $wig->values($start=>$end,$samples)
189
190Retrieve a sampling of the values between $start and $end. Nothing
191very sophisticated is done here; the code simply returns the number of
192values indicated in $samples, smoothed according to the smoothing
193method selected (default to "mean"), then selected at even intervals
194from the range $start to $end. The return value is an arrayref of
195exactly $samples values.
196
197=item $string = $wig->export_to_wif($start,$end)
198
199=item $string = $wig->export_to_wif64($start,$end)
200
201Export the region from start to end in the "wif" format. This data can
202later be imported into another Bio::Graphics::Wiggle object. The first
203version returns a binary string. The second version returns a base64
204encoded version that is safe for ascii-oriented formata such as GFF3
205and XML.
206
207=item $wig->import_from_wif($string)
208
209=item $wig->import_from_wif64($string)
210
211Import a wif format data string into the Bio::Graphics::Wiggle
212object. The first version expects a binary string. The second version
213expects a base64 encoded version that is safe for ascii-oriented
214formata such as GFF3 and XML.
215
216=back
217
218
219=cut
220
221# read/write genome tiling data, to be compatible with Jim Kent's WIG format
222use strict;
223use warnings;
224use IO::File;
225use Carp 'croak','carp','confess';
226
227use constant HEADER_LEN => 256;
228    # seqid, step, min, max, span, mean, stdev, version, start
229use constant HEADER => '(Z50LFFLFFSL)@'.HEADER_LEN;
230use constant BODY   => 'C';
231use constant DEBUG  => 0;
232use constant DEFAULT_SMOOTHING => 'mean';
233use constant VERSION => 0;
234our $VERSION = '1.0';
235
236sub new {
237  my $class          = shift;
238  my ($path,$write,$options) = @_;
239  $path ||= ''; # to avoid uninit warning
240  my $mode = $write ? -e $path   # if file already exists...
241                         ? '+<'    # ...open for read/write
242                         : '+>'    # ...else clobber and open a new one
243                    : '<';       # read only
244  my $fh = $class->new_fh($path,$mode);
245  $fh or die (($path||'temporary file').": $!");
246
247  $options ||= {};
248
249  my $self = bless {fh      => $fh,
250		    write   => $write,
251		    dirty   => scalar keys %$options
252		   }, ref $class || $class;
253
254  my $stored_options = eval {$self->_readoptions} || {};
255  $options->{start}-- if defined $options->{start};  # 1-based ==> 0-based coordinates
256  my %merged_options = (%$stored_options,%$options);
257  # warn "merged options = ",join ' ',%merged_options;
258  $merged_options{version}||= 0;
259  $merged_options{seqid}  ||= 'chrUnknown';
260  $merged_options{min}    ||= 0;
261  $merged_options{max}    ||= 255;
262  $merged_options{mean}   ||= 128;
263  $merged_options{stdev}  ||= 255;
264  $merged_options{trim}   ||= 'none';
265  $merged_options{step}   ||= 1;
266  $merged_options{start}  ||= 0;
267  $merged_options{span}   ||= $merged_options{step};
268  $self->{options}         = \%merged_options;
269  $self->_do_trim        unless $self->trim eq 'none';
270  return $self;
271}
272
273sub new_fh {
274    my $self = shift;
275    my ($path,$mode) = @_;
276    return $path ? IO::File->new($path,$mode)
277                 : IO::File->new_tmpfile;
278}
279
280sub end {
281  my $self = shift;
282  unless (defined $self->{end}) {
283      my $size     = (stat($self->fh))[7];
284      my $data_len = $size - HEADER_LEN();
285      return unless $data_len>0;   # undef end
286      $self->{end} = ($self->start-1) + $data_len * $self->step;
287  }
288  return $self->{end};
289}
290
291sub DESTROY { shift->write }
292
293sub erase {
294  my $self = shift;
295  $self->fh->truncate(HEADER_LEN);
296}
297
298sub fh     { shift->{fh}    }
299sub seek   { shift->fh->seek(shift,0) }
300sub tell   { shift->fh->tell()        }
301
302sub _option {
303  my $self   = shift;
304  my $option = shift;
305  my $d      = $self->{options}{$option};
306  if (@_) {
307    $self->{dirty}++;
308    $self->{options}{$option} = shift;
309    delete $self->{scale} if $option eq 'min' or $option eq 'max';
310  }
311  return $d;
312}
313
314sub version  { shift->_option('version',@_)  }
315sub seqid    { shift->_option('seqid',@_) }
316sub min      { shift->_option('min',@_) }
317sub max      { shift->_option('max',@_) }
318sub step     { shift->_option('step',@_) }
319sub span     { shift->_option('span',@_) }
320sub mean     { shift->_option('mean',@_) }
321sub stdev    { shift->_option('stdev',@_) }
322sub trim     { shift->_option('trim',@_)  }
323
324sub start    {  # slightly different because we have to deal with 1 vs 0-based coordinates
325    my $self  = shift;
326    my $start = $self->_option('start');
327    $start++;   # convert into 1-based coordinates
328    if (@_) {
329	my $newstart = shift;
330	$self->_option('start',$newstart-1); # store in zero-based coordinates
331    }
332    return $start;
333}
334
335sub smoothing {
336  my $self = shift;
337  my $d    = $self->{smoothing} || DEFAULT_SMOOTHING;
338  $self->{smoothing} = shift if @_;
339  $d;
340}
341
342sub write {
343  my $self = shift;
344  if ($self->{dirty} && $self->{write}) {
345    $self->_writeoptions($self->{options});
346    undef $self->{dirty};
347    $self->fh->flush;
348  }
349}
350
351sub _readoptions {
352  my $self = shift;
353  my $fh = $self->fh;
354  my $header;
355  $fh->seek(0,0);
356  return unless  $fh->read($header,HEADER_LEN) == HEADER_LEN;
357  return $self->_parse_header($header);
358}
359
360sub _parse_header {
361    my $self   = shift;
362    my $header = shift;
363    my ($seqid,$step,$min,$max,$span,
364	$mean,$stdev,$version,$start) = unpack(HEADER,$header);
365    return { seqid => $seqid,
366	     step  => $step,
367	     span  => $span,
368	     min   => $min,
369	     max   => $max,
370	     mean  => $mean,
371	     stdev => $stdev,
372	     version => $version,
373	     start => $start,
374    };
375}
376
377sub _generate_header {
378    my $self    = shift;
379    my $options = shift;
380    return pack(HEADER,@{$options}{qw(seqid step min max span mean stdev version start)});
381}
382
383sub _writeoptions {
384  my $self    = shift;
385  my $options = shift;
386  my $fh = $self->fh;
387  my $header = $self->_generate_header($options);
388  $fh->seek(0,0);
389  $fh->print($header) or die "write failed: $!";
390}
391
392sub _do_trim {
393    my $self = shift;
394
395    # don't trim if there is no score range
396    ($self->max - $self->min) or return;
397
398    my $trim = lc $self->trim;
399    my ($method,$arg);
400    if ($trim =~ /([a-z]+)(\d+)/) {
401      $method = "_trim_${1}";
402      $arg    = $2;
403    }
404    else {
405      $method = "_trim_${trim}";
406    }
407    unless ($self->can($method)) {
408	carp "invalid trim method $trim";
409	return;
410    }
411
412    $self->$method($arg);
413}
414
415# trim n standard deviations from the mean
416sub _trim_stdev {
417  my $self   = shift;
418  my $factor = shift || 1;
419  my $mean   = $self->mean;
420  my $stdev  = $self->stdev * $factor;
421  my $min    = $self->min > $mean - $stdev ? $self->min : $mean - $stdev;
422  my $max    = $self->max < $mean + $stdev ? $self->max : $mean + $stdev;
423  warn "_trim_stdev (* $factor) : setting min to $min, max to $max (was ",$self->min,',',$self->max,')'
424      if DEBUG;
425  $self->min($min);
426  $self->max($max);
427}
428
429sub set_value {
430  my $self = shift;
431  croak "usage: \$wig->set_value(\$position => \$value)"
432    unless @_ == 2;
433  $self->value(@_);
434}
435
436sub set_range {
437  my $self = shift;
438  croak "usage: \$wig->set_range(\$start_position => \$end_position, \$value)"
439    unless @_ == 3;
440  $self->value(@_);
441}
442
443sub value {
444  my $self     = shift;
445  my $position = shift;
446
447  my $offset   = $self->_calculate_offset($position);
448  $offset     >= HEADER_LEN or die "Tried to retrieve data from before start position";
449  $self->seek($offset)      or die "Seek failed: $!";
450
451  if (@_ == 2) {
452    my $end       = shift;
453    my $new_value = shift;
454    my $step      = $self->step;
455    my $scaled_value  = $self->scale($new_value);
456    $self->fh->print(pack('C*',($scaled_value)x(($end-$position+1)/$step))) or die "Write failed: $!";
457    $self->{end} = $end if !exists $self->{end} || $self->{end} < $end;
458  }
459
460  elsif (@_==1) {
461    my $new_value     = shift;
462    my $scaled_value  = $self->scale($new_value);
463    $self->fh->print(pack('C*',$scaled_value)) or die "Write failed: $!";
464    $self->{end} = $position if !exists $self->{end} || $self->{end} < $position;
465    return $new_value;
466  }
467
468  else { # retrieving data
469    my $buffer;
470    $self->fh->read($buffer,1) or die "Read failed: $!";
471    my $scaled_value = unpack('C*',$buffer);
472
473    # missing data, so look back at most span values to get it
474    if ($scaled_value == 0 && (my $span = $self->span) > 1) {
475      $offset  = $self->_calculate_offset($position-$span+1);
476      $offset >= HEADER_LEN or die "Tried to retrieve data from before start position";
477      $self->seek($offset) or die "Seek failed: $!";
478
479      $self->fh->read($buffer,$span/$self->step);
480      for (my $i=length($buffer)-2;$i>=0;$i--) {
481	my $val = substr($buffer,$i,1);
482	next if $val eq "\0";
483	$scaled_value = unpack('C*',$val);
484	last;
485      }
486
487    }
488    return $self->unscale($scaled_value);
489  }
490}
491
492sub _calculate_offset {
493  my $self     = shift;
494  my $position = shift;
495  my $step     = $self->step;
496  my $start    = $self->start;
497  return HEADER_LEN + int(($position-$start)/$step);
498}
499
500sub set_values {
501  my $self = shift;
502  croak "usage: \$wig->set_values(\$position => \@values)"
503    unless @_ == 2 and ref $_[1] eq 'ARRAY';
504  $self->values(@_);
505}
506
507# read or write a series of values
508sub values {
509  my $self  = shift;
510  my $start = shift;
511  if (ref $_[0] && ref $_[0] eq 'ARRAY') {
512    $self->_store_values($start,@_);
513  } else {
514    $self->_retrieve_values($start,@_);
515  }
516}
517
518sub export_to_wif64 {
519    my $self = shift;
520    my $data = $self->export_to_wif(@_);
521    eval "require MIME::Base64"
522	unless MIME::Base64->can('encode_base64');
523    return MIME::Base64::encode_base64($data);
524}
525sub import_from_wif64 {
526    my $self = shift;
527    my $data = shift;
528
529    eval "require MIME::Base64"
530	unless MIME::Base64->can('decode_base64');
531    return $self->import_from_wif(MIME::Base64::decode_base64($data));
532}
533
534# subregion in "wiggle interchange format" (wif)
535sub export_to_wif {
536    my $self = shift;
537    my ($start,$end) = @_;
538
539    # get the 256 byte header
540    my $data = $self->_generate_header($self->{options});
541
542    # add the range to the data (8 bytes overhead)
543    $data   .= pack("L",$start);
544    $data   .= pack("L",$end);
545
546    # add the packed data for this range
547    $data   .= $self->_retrieve_packed_range($start,$end-$start+1,$self->step);
548    return $data;
549}
550
551sub export_to_bedgraph {
552    my $self = shift;
553    my ($start,$end,$fh) = @_;
554    my $max_range = 100_000;
555
556    $start ||= 1;
557    $end   ||= $self->end;
558
559    my $lines;
560    for (my $s=$start;$s<$end;$s+=$max_range) {
561	my $e      = $s + $max_range - 1;
562	$e         = $end if $e > $end;
563	my $b      = $self->values($s,$e);
564	$lines .= $self->_bedgraph_lines($s,$b,$fh);
565    }
566
567    return $lines;
568}
569
570sub _bedgraph_lines {
571    my $self = shift;
572    my ($start,$values,$fh) = @_;
573    my $seqid        = $self->seqid;
574    my $result;
575
576    my ($last_val,$last_start,$end);
577    $last_start = $start-1; # 0 based indexing
578    for (my $i=0;$i<@$values;$i++) {
579	my $v           = $values->[$i];
580
581	if (!defined $v) {
582	    if (defined $last_val) {
583		$result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val);
584		undef $last_val;
585	    }
586	    $last_start = $start+$i;
587	    next;
588	}
589
590	if (defined $last_val && $last_val != $v) {
591	    $result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val);
592	    $last_start = $start+$i-1;
593	}
594
595	$last_val = $v;
596	$end      = $start+$i-1;
597    }
598    $result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$end+1,$last_val) if $last_val;
599    return $result;
600}
601
602sub _append_or_print_bedgraph {
603    my $self = shift;
604    my ($fh,$seqid,$start,$end,$val) = @_;
605    my $data = join("\t",$seqid,$start,$end,sprintf("%.2f",$val))."\n";
606    if ($fh) {
607	print $fh $data;
608	return '';
609    } else {
610	return $data;
611    }
612}
613
614sub import_from_wif {
615    my $self    = shift;
616    my $wifdata = shift;
617
618    # BUG: should check that header is compatible
619    my $header  = substr($wifdata,0,HEADER_LEN);
620    my $start   = unpack('L',substr($wifdata,HEADER_LEN,  4));
621    my $end     = unpack('L',substr($wifdata,HEADER_LEN+4,4));
622
623    my $options = $self->_parse_header($header);
624    my $stored_options = eval {$self->_readoptions} || {};
625    my %merged_options = (%$stored_options,%$options);
626    $self->{options}   = \%merged_options;
627    $self->{dirty}++;
628
629    # write the data
630    $self->seek($self->_calculate_offset($start));
631    $self->fh->print(substr($wifdata,HEADER_LEN+8)) or die "write failed: $!";
632    $self->{end} = $end if !defined $self->{end} or $self->{end} < $end;
633}
634
635sub _retrieve_values {
636  my $self = shift;
637  my ($start,$end,$samples) = @_;
638
639  my $data_start = $self->start;
640  my $step       = $self->step;
641  my $span       = $self->span;
642
643  croak "Value of start position ($start) is less than data start of $data_start"
644      unless $start >= $data_start;
645  croak "Value of end position ($end) is greater than data end of ",$self->end+$span,
646      unless $end <= $self->end + $span;
647
648  # generate list of positions to sample from
649  my $length = $end-$start+1;
650  $samples ||= $length;
651
652#  warn "samples = $samples, length=$length, span=$span, step=$step";
653
654  # if the length is grossly greater than the samples, then we won't even
655  # bother fetching all the data, but just sample into the disk file
656  if ($length/$samples > 100 && $step == 1) {
657      my @result;
658#      my $window   = 20*($span/$step);
659      my $interval = $length/$samples;
660#      my $window   = 100*$interval/$span;
661      my $window    = $interval/2;
662#      warn "window = $window, interval = $interval";
663      for (my $i=0;$i<$samples;$i++) {
664	  my $packed_data = $self->_retrieve_packed_range(int($start+$i*$interval-$window),
665							  int($window),
666							  $step);
667	  my @bases= grep {$_} unpack('C*',$packed_data);
668	  if (@bases) {
669	      local $^W = 0;
670	      my $arry = $self->unscale(\@bases);
671	      my $n    = @$arry;
672	      my $total = 0;
673	      $total   += $_ foreach @$arry;
674	      my $mean = $total/$n;
675	      my $max;
676	      for (@$arry) { $max = $_ if !defined $max || $max < $_ }
677#	      warn $start+$i*$interval,': ',join(',',map {int($_)} @$arry),
678#	      " mean = $mean, max = $max";
679#	      push @result,$mean;
680	      push @result,$max;
681	  } else {
682	      push @result,0;
683	  }
684      }
685      return \@result;
686  }
687
688  my $packed_data = $self->_retrieve_packed_range($start,$length,$step);
689
690  my @bases;
691  $#bases = $length-1;
692
693  if ($step == $span) {
694    # in this case, we do not have any partially-empty
695    # steps, so can operate on the step-length data structure
696    # directly
697    @bases = unpack('C*',$packed_data);
698  }
699
700  else {
701    # In this case some regions may have partially missing data,
702    # so we create an array equal to the length of the requested region,
703    # fill it in, and then sample it
704    for (my $i=0; $i<length $packed_data; $i++) {
705      my $index = $i * $step;
706      my $value = unpack('C',substr($packed_data,$i,1));
707      next unless $value;  # ignore 0 values
708      @bases[$index..$index+$span-1] = ($value) x $span;
709    }
710    $#bases = $length-1;
711  }
712
713  my $r = $self->unscale(\@bases);
714  $r    = $self->sample($r,$samples);
715  $r    = $self->smooth($r,$self->window * $samples/@bases)
716      if defined $self->window && $self->window>1;
717  return $r;
718}
719
720sub _retrieve_packed_range {
721  my $self = shift;
722  my ($start,$length,$step) = @_;
723  my $span = $self->span;
724
725  my $offset            = $self->_calculate_offset($start);
726
727  $self->seek($offset);
728  my $packed_data;
729  $self->fh->read($packed_data,$length/$step);
730
731  # pad data up to required amount
732  $packed_data .= "\0" x ($length/$step-length($packed_data))
733    if length $packed_data < $length/$step;
734  return $packed_data;
735}
736
737
738sub sample {
739  my $self = shift;
740  my ($values,$samples) = @_;
741  my $length = @$values;
742  my $window_size = $length/$samples;
743
744  my @samples;
745  $#samples = $samples-1;
746
747  if ($window_size < 2) { # no data smoothing needed
748    @samples = map { $values->[$_*$window_size] } (0..$samples-1);
749  }
750  else {
751    my $smoothsub = $self->smoothsub;
752    for (my $i=0; $i<$samples; $i++) {
753      my $start    = $i * $window_size;
754      my $end      = $start + $window_size - 1;
755      my @window   = @{$values}[$start..$end];
756
757      my $value    =  $smoothsub->(\@window);
758      $samples[$i] = $value;
759    }
760  }
761
762  return \@samples;
763}
764
765sub smoothsub {
766  my $self = shift;
767
768  my $smoothing = $self->smoothing;
769  my $smoothsub   = $smoothing eq 'mean' ? \&sample_mean
770                   :$smoothing eq 'max'  ? \&sample_max
771                   :$smoothing eq 'min'  ? \&sample_min
772                   :$smoothing eq 'none' ? \&sample_center
773                   :croak("invalid smoothing type '$smoothing'");
774  return $smoothsub;
775}
776
777sub smooth {
778  my ($self,$data,$window) = @_;
779
780  my $smoothing = $self->smoothing;
781  $window  ||= $self->window;
782
783  return $data if $smoothing eq 'none' || $window < 2;
784
785  my @data = @$data;
786  my $smoother = $self->smoothsub;
787  $window++ unless $window % 2;
788  my $offset = int($window/2);
789
790  for (my $i=$offset; $i<@$data-$offset; $i++) {
791    my $start = $i - $offset;
792    my $end   = $i + $offset;
793    my @subset = @data[$start..$end];
794    $data->[$i] = $smoother->(\@subset);
795  }
796
797  return $data;
798}
799
800sub window {
801  my $self = shift;
802  my $d    = $self->{window};
803  $self->{window} = shift if @_;
804  $d;
805}
806
807sub sample_mean {
808  my $values = shift;
809  my ($total,$items);
810  for my $v (@$values) {
811    next unless defined $v;
812    $items++;
813    $total+=$v;
814  }
815  return $items ? $total/$items : undef;
816}
817
818sub sample_max {
819  my $values = shift;
820  my $max;
821  for my $v (@$values) {
822    next unless defined $v;
823    $max = $v if !defined $max or $max < $v;
824  }
825  return $max;
826}
827
828sub sample_min {
829  my $values = shift;
830  my $min;
831  for my $v (@$values) {
832    next unless defined $v;
833    $min = $v if !defined $min or $min > $v;
834  }
835  return $min;
836}
837
838sub sample_center {
839    my $values = shift;
840    return $values->[@$values/2];
841}
842
843sub _store_values {
844  my $self = shift;
845  my ($position,$data) = @_;
846
847  # where does data start
848  my $offset = $self->_calculate_offset($position);
849  my $fh     = $self->fh;
850  my $step   = $self->step;
851
852  my $scaled = $self->scale($data);
853
854  $self->seek($offset);
855  my $packed_data = pack('C*',@$scaled);
856  $fh->print($packed_data) or die "Write failed: $!";
857
858  my $new_end  = $position+@$data-1;
859  $self->{end} = $new_end if !exists $self->{end} || $self->{end} < $new_end;
860}
861
862# zero means "no data"
863# everything else is scaled from 1-255
864sub scale {
865  my $self           = shift;
866  my $values         = shift;
867  my $scale = $self->_get_scale;
868  my $min   = $self->{options}{min};
869  if (ref $values && ref $values eq 'ARRAY') {
870    my @return = map {
871      my $i = ($_ - $min)/$scale;
872      my $v = 1 + int($i+0.5*($i<=>0));  # avoid call to round()
873      $v = 1   if $v < 1;
874      $v = 255 if $v > 255;
875      $v;
876    } @$values;
877    return \@return;
878  } else {
879    my $v = 1 + round (($values - $min)/$scale);
880    $v = 1   if $v < 1;
881    $v = 255 if $v > 255;
882    return $v;
883  }
884}
885
886sub unscale {
887  my $self         = shift;
888  my $values       = shift;
889  my $scale = $self->_get_scale;
890  my $min   = $self->{options}{min};
891
892  if (ref $values && ref $values eq 'ARRAY') {
893    my @return = map {$_ ? (($_-1) * $scale + $min) : undef} @$values;
894    return \@return;
895  } else {
896    return $values ? ($values-1) * $scale + $min : undef;
897  }
898}
899
900sub _get_scale {
901  my $self = shift;
902  unless ($self->{scale}) {
903    my $min  = $self->{options}{min};
904    my $max  = $self->{options}{max};
905    my $range = $max - $min || 0.001; # can't be zero!
906    $self->{scale} = $range/254;
907  }
908  return $self->{scale};
909}
910
911sub round {
912  return int($_[0]+0.5*($_[0]<=>0));
913}
914
915
9161;
917
918__END__
919
920=head1 SEE ALSO
921
922L<Bio::Graphics::Wiggle::Loader>,
923L<Bio::Graphics::Panel>,
924L<Bio::Graphics::Glyph>,
925L<Bio::Graphics::Feature>,
926L<Bio::Graphics::FeatureFile>
927
928=head1 AUTHOR
929
930Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
931
932Copyright (c) 2007 Cold Spring Harbor Laboratory
933
934This package and its accompanying libraries is free software; you can
935redistribute it and/or modify it under the terms of the GPL (either
936version 1, or at your option, any later version) or the Artistic
937License 2.0.  Refer to LICENSE for the full license text. In addition,
938please see DISCLAIMER.txt for disclaimers of warranty.
939
940=cut
941