1package Bio::Graphics::Glyph::topoview;
2
3# Based on the fb_shmiggle glyph by Victor Strelets, FlyBase.org
4# 2009-2010 Victor Strelets, FlyBase.org
5# Sheldon McKay <sheldon.mckay@gmail.com> 2015
6
7
8# "topoview.pm" the TopoView glyph was developed for fast
9# 3D-like demonstration of RNA-seq data consisting of multiple
10# individual subsets. The main purposes were to compact presentation
11# as much as possible (in one reasonably sized track) and
12# to allow easy visual detection of coordinated behavior
13# of the expression profiles of different subsets.
14
15# See http://gmod.org/wiki/Using_the_topoview_Glyph for complete documentation
16
17use strict;
18use GD;
19use base 'Bio::Graphics::Glyph::generic';
20use Text::ParseWords 'shellwords';
21use Data::Dumper;
22use List::Util qw/min max shuffle/;
23use constant DEBUG => 0;
24
25use vars qw/$colors_selected $black $red $white $grey @colors %Indices
26            $black $darkgrey $lightgrey $charcoal/;
27
28sub draw {
29    my $self = shift;
30    my $gd   = shift;
31    my ($left,$top,$partno,$total_parts) = @_;
32    my $ft   = $self->feature;
33
34    $black     = $gd->colorClosest(0,0,0);
35    $lightgrey = $gd->colorClosest(225,225,225);
36    $grey      = $gd->colorClosest(200,200,200);
37    $darkgrey  = $gd->colorClosest(125,125,125);
38    $charcoal  = $gd->colorClosest(75,75,75);
39
40    # User specified edge color or else charcoal gray (black is very harsh)
41    if (my $edge_color = $self->option('edge color')) {
42	my $col = $self->factory->translate_color($edge_color);
43	$self->{fgcolor} = $col;
44    }
45    else {
46	$self->{fgcolor} = $charcoal;
47    }
48
49    my($pnstart,$pnstop)    = ($self->panel->start,$self->panel->end); # in seq coordinates
50    my($xf1,$yf1,$xf2,$yf2) = $self->calculate_boundaries($left,$top);
51    my $nseqpoints = $pnstop - $pnstart + 1;
52    my $leftpad    = $self->panel->pad_left;
53    $ft->{datadir} ||= $self->option('datadir');
54    my $datadir    = $ENV{SERVER_PATH} . $ft->{datadir};
55
56    my($start,$stop) = $self->panel->location2pixel(($ft->{start},$ft->{end}));
57    my $ftscrstop    = $stop + $leftpad;
58    my $ftscrstart   = $start + $leftpad;
59
60    my $chromosome = $ft->{ref};
61    my $flipped    = $self->{flip} ? 1 : 0;
62    my($subsets,$subsetsnames,$signals) = $self->getData($ft,$datadir,$chromosome,$pnstart,$pnstop,$xf1,$xf2,$flipped,$gd);
63
64    my $poly_pkg = $self->polygon_package;
65
66    my @orderedsubsets = @{$subsets};
67    my $nsets          = $#orderedsubsets+1;
68
69    # x and y steps
70    my $xstep = $self->option('x_step');
71    my $ystep = $self->option('y_step');
72    unless ($ystep) {
73	$ystep = int(100/$nsets);
74	$ystep = 7  if $ystep >= 7; # empiricaly found - to read lines of tiny fonts
75	$ystep = 12 if $ystep > 12; # empirically found - to preserve topo feel when number of subsets is small
76    }
77    unless ($xstep) {
78	$xstep = 4;
79    }
80    my($xw,$yw) = ( $nsets*$xstep, ($nsets-1)*$ystep );
81
82    my $polybg = $poly_pkg->new();
83    $polybg->addPt($xf1,$yf2-$yw);
84    $polybg->addPt($xf2,$yf2-$yw);
85    $polybg->addPt($xf2-$xw, $yf2);
86    $polybg->addPt($xf1-$xw, $yf2);
87    $gd->polygon($polybg,$lightgrey); # background
88    for( my $xx = $xf1+2; $xx<$xf2; $xx+=6 ) { $gd->line($xx,$yf2-$yw,$xx-$xw,$yf2,$grey); } # grid-helper
89
90    my $xshift = 0;
91    my $yshift = $nsets * $ystep;
92
93    my @screencoords = @{$signals->{screencoords}};
94    my $max_signal = 30;
95    my $koeff = 4;
96    if( my $max = $self->max_score ) {
97	$max_signal = $max;
98	$koeff = 80.0/$max_signal;
99    }
100    my $predictor_cutoff = int($max_signal*0.95); # empirically found
101    my @prevx = ();
102    my @prevy = ();
103    my @prevvals = ();
104    my $profilen = $self->{no_max} ? 1 : 0;
105    my %SPEEDUP = ();
106    my $scrx = 0;
107    my $no_fill = defined $self->option('fill') && !$self->option('fill');
108
109    foreach my $subset ( @orderedsubsets ) {
110	my ($color,$bgcolor,$edgecolor);
111	if ( $self->{bgcolor} ) {
112	    $bgcolor = $self->{bgcolor}->{$subset};
113	}
114
115	if ($profilen == 0) {
116	    ($color,$edgecolor) = ($darkgrey,$charcoal);
117	}
118	else {
119	    $color = $bgcolor;
120	    $edgecolor = $self->{fgcolor};
121	}
122
123	$xshift -= $xstep;
124	$yshift -= $ystep;
125	my @values = @{$signals->{$subset}};
126	my($xold,$yold)= ($xf1+$xshift,$yf2-$yshift+1);
127	my $xpos = 0;
128	my $poly = $poly_pkg->new();
129	$poly->addPt($xold,$yold+1);
130	my @allx = ($xold);
131	my @ally = ($yold);
132	my @allvals = (0);
133	my $runx = $xf1 + $xshift;
134
135	foreach my $val ( @values ) {
136	    $scrx += $leftpad;
137	    my $x =  $screencoords[$xpos] + $xshift;
138	    my $visval;
139	    if( exists $SPEEDUP{$val} ) { $visval = $SPEEDUP{$val}; }
140	    else { $visval = int($val*$koeff); $SPEEDUP{$val}= $visval; }
141	    my $y = $yf2 - $yshift - $visval;
142	    push(@allx,$x);
143	    push(@ally,$y);
144	    push(@allvals,$visval);
145	    if( $xpos>0 ) {
146		$poly->addPt($x,$y+1);
147	    }
148	    ($xold,$yold)= ($x,$y);
149	    $xpos++;
150	}
151	$poly->addPt($xf2+$xshift, $yf2-$yshift+1);
152	unless ($profilen == 0 || $no_fill) {
153	    $gd->filledPolygon($poly,$color);
154	}
155	($xold,$yold)= ($allx[0],$ally[0]);
156	for( my $en =1; $en<=$#allx; $en++ ) {
157	    my $x = $allx[$en];
158	    my $y = $ally[$en];
159	    $gd->line($xold,$yold,$x,$y,$edgecolor);
160	    ($xold,$yold)= ($x,$y);
161	}
162
163	# add scale bar to left and mid-point
164	if ($profilen == 1) {
165	    my($xmin,$yyy,$ymax) = ($allx[1]-1,$yf2-$yw,$ally[0]);
166	    my $xmid = int(($allx[-1] - $xmin)/2);
167	    $self->add_scale_bar([$xmin,$xmid],$yyy,$max_signal,$gd);
168	    $self->{_xmid} = $xmid;
169	    $self->{_ymin} = $ymax;
170	}
171
172	$self->{_key}->{$subsetsnames->{$subset}} = $color;
173
174	$gd->string(GD::Font->Tiny,$xf2+$xshift+3, $yf2-$yshift-5,$subsetsnames->{$subset},$color);
175
176	unless( $profilen ==0 ) { @prevx = @allx; @prevy = @ally; @prevvals = @allvals; }
177	$profilen++;
178    }
179
180    my $hide_key = defined $self->option('show key') && !$self->option('show key');
181    $self->add_subset_key($gd,$subsets) unless $hide_key;
182
183    $gd->flipVertical() if $self->option('flip vertical');
184
185    return;
186}
187
188sub add_scale_bar {
189    my ($self,$xx,$y,$max_signal,$gd) = @_;
190    return if $self->option('flip vertical');
191    for my $x (@$xx) {
192	$gd->string(GD::Font->Tiny,$x-12, $y-3,'0',$black);
193	$gd->line($x-2,$y,$x-2,$y-50,$black);
194	$gd->line($x-4,$y-47,$x-2,$y-50,$black);
195	$gd->line($x,$y-47,$x-2,$y-50,$black);
196	$gd->line($x-4,$y-44,$x-2,$y-44,$black);
197	$gd->string(GD::Font->Tiny,$x-18, $y-47,int($max_signal+0.5),$black);
198    }
199}
200
201sub add_subset_key {
202    my ($self,$gd,$subsets) = @_;
203    my @subsets = grep {!/MAX/} @$subsets;
204    return if $self->option('flip vertical');
205    my $first_x = $self->{_xmid}-250;
206    my $first_y = 12;
207    my $key_colors  = $self->{_key};
208    my $font = GD::Font->MediumBold;
209    my $width = $self->width;
210    my $total_key_width = 18;
211
212    my ($longest_string) = sort {$b <=> $a}
213    map  {$self->string_width($_,$font)} @subsets;
214
215    my $count;
216    my $x = $first_x;
217    my $y = $first_y;
218
219    my $cutoff = 100;
220    if (@subsets > 8 && !(@subsets %2)) {
221        $cutoff = @subsets/2 + 1;
222    }
223    elsif (@subsets > 8) {
224        $cutoff = int(@subsets/2 + 0.5);
225    }
226
227    for my $subset (@subsets) {
228        if (++$count == $cutoff) {
229            $x = $first_x;
230            $y = $first_y + 12;
231        }
232        my $color = $key_colors->{$subset};
233        my $edgecolor = $self->{fgcolor};
234        my $string_width = $self->string_width($subset,$font);
235        $gd->rectangle($x,$y,$x+10,$y+10,$edgecolor);
236        $gd->filledRectangle($x,$y,$x+10,$y+10,$color);
237        $x += 14;
238        $gd->string($font,$x,$y,$subset,$black);
239        $x += $longest_string + 8;
240    }
241}
242
243#--------------------------
244sub getData {
245    my $self = shift;
246    my($ft,$datadir,$chromosome,$start,$stop,$scrstart,$scrstop,$flipped,$gd) = @_;
247    my $global_max_signal = $self->option('max_score') || 0;
248    my %Signals = ();
249    $self->openDataFiles($datadir);
250
251    my $subset_text = $self->option('subset order');
252    if ($subset_text) {
253	my @words = shellwords($subset_text);
254
255	# subset + color
256	if (!(@words %2) && $words[1] =~ /^[0-9A-F]{6}$/ && $words[2] !~ /^[.0-9]+$/) {
257	    while (@words) {
258		push @{$ft->{subsetsorder}}, [splice(@words,0,2)];
259	    }
260	}
261	# subset + color + alpha
262	elsif (!(@words %3) && $words[1] =~ /^[0-9A-F]{6}$/) {
263            while (@words) {
264                push @{$ft->{subsetsorder}}, [splice(@words,0,3)];
265            }
266        }
267	# no color specified? Random color for you. Good luck!
268	else {
269	    for my $word (@words) {
270		push @{$ft->{subsetsorder}}, [$word,$self->random_color()];
271	    }
272	}
273
274    }
275
276    my @subsets = (exists $ft->{'subsetsorder'}) ? @{$ft->{'subsetsorder'}} : sort split(/\t+/,$Indices{'subsets'});
277
278    my $user_max = $self->option('max_score');
279
280    # This bit of code reads in user-specified bgcolor, if provided
281    if ( ref $subsets[0] eq 'ARRAY' ) {
282	for (@subsets) {
283	    next unless ref $_ eq 'ARRAY';
284	    my ($subset,$color,$alpha)  = @$_;
285	    $alpha ||= $self->option('fill opacity') || 1.0;
286
287	    if ($alpha && $alpha > 1) {
288		die "Alpha must be between zero and 1";
289	    }
290
291	    # make it hex if it looks like hex
292	    if ((length $color == 6) && $color =~ /^[0-9A-F]+$/) {
293		$color = '#'.$color;
294	    }
295	    my $bgcolor = $self->factory->transparent_color($alpha,$color);
296	    my $fgcolor = $self->translate_color($color);
297	    $self->{bgcolor}->{$subset} = $bgcolor;
298
299	    # We will re-use this array later
300	    $_ = $subset;
301	}
302    }
303
304    shift(@subsets) if $subsets[0] eq 'MAX';
305    warn("subsets: @subsets\n") if DEBUG;
306
307    my %SubsetsNames = (exists $ft->{'subsetsnames'}) ? %{$ft->{'subsetsnames'}} : map { $_, $_ } @subsets;
308    $SubsetsNames{MAX}= 'MAX';
309    my $screenstep = ($scrstop-$scrstart+1) * 1.0 / ($stop-$start+1);
310    my $donecoords = 0;
311    my $local_max_signal = 0;
312
313    foreach my $subset ( @subsets ) {
314	my $nstrings = 0;
315	# scan seq ranges offsets to see where to start reading
316	my $key = $subset.':'.$chromosome;
317	my $poskey = $key.':offsets';
318	my $ranges_pos = (exists $Indices{$poskey}) ? int($Indices{$poskey}) : -1;
319	if( $ranges_pos == -1 ) { next; } # no such signal..
320	warn("  positioning for $poskey starts at $ranges_pos\n") if DEBUG;
321	if( $start>=1000000 ) {
322	    my $bigstep = int($start/1000000.0);
323	    if( exists $Indices{$key.':offsets:'.$bigstep} ) {
324		my $jumpval = $Indices{$key.':offsets:'.$bigstep};
325		warn("  jump in offset search to $jumpval\n") if DEBUG;
326		$ranges_pos = int($jumpval); }
327	}
328	seek(DATF,$ranges_pos,0);
329	my($offset,$offset1)= (0,0);
330	my $lastseqloc = -999999999;
331	my $useoffset = 0;
332	while( (my $strs =<DATF>) ) {
333	    $nstrings++ if DEBUG;
334	    if( DEBUG ) {
335		chop($strs); warn("  	positioning read for coord $start ($strs)\n"); }
336	    last unless $strs =~m/^(-?\d+)[ \t]+(\d+)/;
337	    my($seqloc,$fileoffset)= ($1,$2);
338	    if( DEBUG ) {
339		chop($strs); warn("  positioning read for $poskey => $seqloc, $fileoffset ($strs)\n"); }
340	    $offset1 = $offset;
341	    $offset = $fileoffset;
342	    $lastseqloc = $seqloc;
343	    if( $seqloc > $start ) { $useoffset = int($offset1); last; }
344	}
345	warn("  will use offset $useoffset\n") if DEBUG;
346	warn("  	(scanned $nstrings offset strings)\n") if DEBUG;
347	if( $useoffset ==0 ) { # data offset cannot be 0 - means didn't find where to read required data..
348	    next;
349	    my @emptyvals = ();
350	    for( my $ii = $scrstart; $ii++ <= $scrstop; ) { push(@emptyvals,0); }
351	    $Signals{$subset}= \@emptyvals;
352	}
353	$nstrings = 0;
354	# read signal profile
355	seek(DATF,$useoffset,0);
356	$lastseqloc = -999999999;
357	my $lastsignal = 0;
358	my($scrx,$scrxold)= ($scrstart,$scrstart-1);
359	my $runmax = 0;
360	my @values = ();
361	my @xscreencoords = ();
362
363	while( (my $str =<DATF>) ) {
364	    $nstrings++ if DEBUG;
365	    unless( $str =~m/^(-?\d+)[ \t]+(\d+)/ ) {
366		warn("  header read: $str") if DEBUG;
367		last; # because no headers were indexed at the beginning of data packs
368	    }
369	    my($seqloc,$signal)= ($1,$2);
370	    my $real_signal = $signal;
371	    $signal = $user_max if $user_max && $signal > $user_max;
372	    $local_max_signal = $signal if $signal > $local_max_signal;
373
374	    warn("  signal read: $seqloc, $signal 		line: $str") if DEBUG;
375	    last if $lastseqloc > $seqloc; # just in case, as all sits merged in one file..
376	    if( $seqloc>=$start ) { # current is the next one after the one we need to start from..
377		unless( $lastseqloc == -999999999 ) { # expand previous
378		    $lastseqloc = $start-2 if $lastseqloc<$start; # limit empty steps (they may start from -200000)
379		    while( $lastseqloc < $seqloc ) { # until another (one we just retrieved) wiggle reading
380			last if $lastseqloc > $stop; # end of subset data
381			next if $lastseqloc++ < $start;
382			# we have actual new seq position in our required range
383			my $scrpos = int($scrx);
384			$runmax = $lastsignal if $runmax < $lastsignal;
385			if( $scrpos != $scrxold ) { # we have actual new seq _and_ screen position
386			    push(@values,$runmax);
387			    push(@xscreencoords,$scrpos) unless $donecoords;
388			    #print STDERR Dumper \@xscreencoords unless $donecoords;
389			    $scrxold = $scrpos;
390			    $runmax = 0;
391			}
392			$scrx += $screenstep; # remember - it is not integer
393		    }
394		}
395	    }
396	    ($lastseqloc,$lastsignal)= ($seqloc,$signal);
397	    last if $seqloc > $stop; # end of subset data
398	}
399	if( $lastseqloc < $stop ) { # if on the end of signal profile, but still in screen range
400	    # just assume that we are getting one more reading with signal == 0
401	    my $signal = 0;
402	    while( $lastseqloc++ < $stop ) {
403		my $scrpos = int($scrx);
404		if( $scrpos != $scrxold ) { # we have actual new seq _and_ screen position
405		    push(@values,$signal);
406		    push(@xscreencoords,$scrpos) unless $donecoords;
407		    $scrxold = $scrpos;
408		}
409		$scrx += $screenstep;
410	    }
411	}
412	warn("  	(scanned $nstrings signal strings)\n") if DEBUG;
413	$nstrings = 0;
414	if( $flipped ) {
415	    my @ch = reverse @values; @values = @ch;
416	}
417	warn("  ".$subset."=> ".@values." values @values\n") if DEBUG && $#values<1000;
418	$Signals{$subset}= \@values;
419	$Signals{screencoords}= \@xscreencoords unless $donecoords;
420	$donecoords = 1;
421    } # foreach my $subset ( @subsets ) {
422
423    # scaling can be local, user-defined max or global max
424    my $scale    = $self->option('autoscale') || 'global';
425    if ($scale eq 'local' && !$user_max) {
426	$self->max_score($local_max_signal);
427    }
428    else {
429	$self->max_score($user_max || $Indices{max_signal});
430    }
431
432    warn("  max_signal => ".$self->max_score." \n") if DEBUG;
433
434    # prepare MAX profile - will be used as a base for exon/UTR prediction
435    $self->{no_max} = defined $self->option('show max') && ! $self->option('show max');
436    unless ($self->{no_max}) {
437	my @maxprofile = ();
438	my @ruler = @{$Signals{screencoords}};
439	for( my $npos = 0; $npos<=$#ruler; $npos++ ) {
440	    my $maxval = 0;
441	    foreach my $subset ( @subsets ) {
442		my $p = $Signals{$subset};
443		my $val = $p->[$npos];
444		$maxval = $val if $maxval < $val;
445	    }
446	    push(@maxprofile,$maxval);
447	}
448	$Signals{MAX}= \@maxprofile;
449	warn("  MAX => ".@maxprofile." values @maxprofile\n") if DEBUG && $#maxprofile<1000;
450	unshift(@subsets,'MAX');
451    }
452
453    return(\@subsets,\%SubsetsNames, \%Signals);
454}
455
456#--------------------------
457sub openDataFiles {
458    my $self = shift;
459    my $datadir = shift;
460    $datadir.= '/' unless $datadir =~m|/$|;
461    my $datafile = $datadir.'data.cat';
462    open(DATF,$datafile) || die("cannot open $datafile\n");
463    use BerkeleyDB; # caller should already used proper 'use lib' command with path
464    my $bdbfile = $datadir . 'index.bdbhash';
465    tie %Indices, "BerkeleyDB::Hash", -Filename => $bdbfile, -Flags => DB_RDONLY || warn("can't read BDBHash $bdbfile\n");
466    if( DEBUG ) { foreach my $kk ( sort keys %Indices ) { warn("	$kk => ".$Indices{$kk}."\n"); } }
467    return;
468}
469
470sub min_score {
471    # not implemented
472}
473
474sub max_score {
475    my $self  = shift;
476    my $score = shift;
477    $self->{max_score} ||= $score;
478    return $self->{max_score};
479}
480
481sub random_color {
482    my $self = shift;
483    my @nums = 0..9,'A'..'F';
484    my $color;
485    for (0..5) {
486	my @array = shuffle(@nums);
487	my $char  = shift @array;
488	$color .= $char;
489    }
490    return $color;
491}
492
493
4941;
495
496
497