1package Bio::Graphics::Glyph::topoview; 2 3# Based on the fb_shmiggle glyph by Victor Strelets, FlyBase.org 4# 2009-2010 Victor Strelets, FlyBase.org 5# Sheldon McKay <sheldon.mckay@gmail.com> 2015 6 7 8# "topoview.pm" the TopoView glyph was developed for fast 9# 3D-like demonstration of RNA-seq data consisting of multiple 10# individual subsets. The main purposes were to compact presentation 11# as much as possible (in one reasonably sized track) and 12# to allow easy visual detection of coordinated behavior 13# of the expression profiles of different subsets. 14 15# See http://gmod.org/wiki/Using_the_topoview_Glyph for complete documentation 16 17use strict; 18use GD; 19use base 'Bio::Graphics::Glyph::generic'; 20use Text::ParseWords 'shellwords'; 21use Data::Dumper; 22use List::Util qw/min max shuffle/; 23use constant DEBUG => 0; 24 25use vars qw/$colors_selected $black $red $white $grey @colors %Indices 26 $black $darkgrey $lightgrey $charcoal/; 27 28sub draw { 29 my $self = shift; 30 my $gd = shift; 31 my ($left,$top,$partno,$total_parts) = @_; 32 my $ft = $self->feature; 33 34 $black = $gd->colorClosest(0,0,0); 35 $lightgrey = $gd->colorClosest(225,225,225); 36 $grey = $gd->colorClosest(200,200,200); 37 $darkgrey = $gd->colorClosest(125,125,125); 38 $charcoal = $gd->colorClosest(75,75,75); 39 40 # User specified edge color or else charcoal gray (black is very harsh) 41 if (my $edge_color = $self->option('edge color')) { 42 my $col = $self->factory->translate_color($edge_color); 43 $self->{fgcolor} = $col; 44 } 45 else { 46 $self->{fgcolor} = $charcoal; 47 } 48 49 my($pnstart,$pnstop) = ($self->panel->start,$self->panel->end); # in seq coordinates 50 my($xf1,$yf1,$xf2,$yf2) = $self->calculate_boundaries($left,$top); 51 my $nseqpoints = $pnstop - $pnstart + 1; 52 my $leftpad = $self->panel->pad_left; 53 $ft->{datadir} ||= $self->option('datadir'); 54 my $datadir = $ENV{SERVER_PATH} . $ft->{datadir}; 55 56 my($start,$stop) = $self->panel->location2pixel(($ft->{start},$ft->{end})); 57 my $ftscrstop = $stop + $leftpad; 58 my $ftscrstart = $start + $leftpad; 59 60 my $chromosome = $ft->{ref}; 61 my $flipped = $self->{flip} ? 1 : 0; 62 my($subsets,$subsetsnames,$signals) = $self->getData($ft,$datadir,$chromosome,$pnstart,$pnstop,$xf1,$xf2,$flipped,$gd); 63 64 my $poly_pkg = $self->polygon_package; 65 66 my @orderedsubsets = @{$subsets}; 67 my $nsets = $#orderedsubsets+1; 68 69 # x and y steps 70 my $xstep = $self->option('x_step'); 71 my $ystep = $self->option('y_step'); 72 unless ($ystep) { 73 $ystep = int(100/$nsets); 74 $ystep = 7 if $ystep >= 7; # empiricaly found - to read lines of tiny fonts 75 $ystep = 12 if $ystep > 12; # empirically found - to preserve topo feel when number of subsets is small 76 } 77 unless ($xstep) { 78 $xstep = 4; 79 } 80 my($xw,$yw) = ( $nsets*$xstep, ($nsets-1)*$ystep ); 81 82 my $polybg = $poly_pkg->new(); 83 $polybg->addPt($xf1,$yf2-$yw); 84 $polybg->addPt($xf2,$yf2-$yw); 85 $polybg->addPt($xf2-$xw, $yf2); 86 $polybg->addPt($xf1-$xw, $yf2); 87 $gd->polygon($polybg,$lightgrey); # background 88 for( my $xx = $xf1+2; $xx<$xf2; $xx+=6 ) { $gd->line($xx,$yf2-$yw,$xx-$xw,$yf2,$grey); } # grid-helper 89 90 my $xshift = 0; 91 my $yshift = $nsets * $ystep; 92 93 my @screencoords = @{$signals->{screencoords}}; 94 my $max_signal = 30; 95 my $koeff = 4; 96 if( my $max = $self->max_score ) { 97 $max_signal = $max; 98 $koeff = 80.0/$max_signal; 99 } 100 my $predictor_cutoff = int($max_signal*0.95); # empirically found 101 my @prevx = (); 102 my @prevy = (); 103 my @prevvals = (); 104 my $profilen = $self->{no_max} ? 1 : 0; 105 my %SPEEDUP = (); 106 my $scrx = 0; 107 my $no_fill = defined $self->option('fill') && !$self->option('fill'); 108 109 foreach my $subset ( @orderedsubsets ) { 110 my ($color,$bgcolor,$edgecolor); 111 if ( $self->{bgcolor} ) { 112 $bgcolor = $self->{bgcolor}->{$subset}; 113 } 114 115 if ($profilen == 0) { 116 ($color,$edgecolor) = ($darkgrey,$charcoal); 117 } 118 else { 119 $color = $bgcolor; 120 $edgecolor = $self->{fgcolor}; 121 } 122 123 $xshift -= $xstep; 124 $yshift -= $ystep; 125 my @values = @{$signals->{$subset}}; 126 my($xold,$yold)= ($xf1+$xshift,$yf2-$yshift+1); 127 my $xpos = 0; 128 my $poly = $poly_pkg->new(); 129 $poly->addPt($xold,$yold+1); 130 my @allx = ($xold); 131 my @ally = ($yold); 132 my @allvals = (0); 133 my $runx = $xf1 + $xshift; 134 135 foreach my $val ( @values ) { 136 $scrx += $leftpad; 137 my $x = $screencoords[$xpos] + $xshift; 138 my $visval; 139 if( exists $SPEEDUP{$val} ) { $visval = $SPEEDUP{$val}; } 140 else { $visval = int($val*$koeff); $SPEEDUP{$val}= $visval; } 141 my $y = $yf2 - $yshift - $visval; 142 push(@allx,$x); 143 push(@ally,$y); 144 push(@allvals,$visval); 145 if( $xpos>0 ) { 146 $poly->addPt($x,$y+1); 147 } 148 ($xold,$yold)= ($x,$y); 149 $xpos++; 150 } 151 $poly->addPt($xf2+$xshift, $yf2-$yshift+1); 152 unless ($profilen == 0 || $no_fill) { 153 $gd->filledPolygon($poly,$color); 154 } 155 ($xold,$yold)= ($allx[0],$ally[0]); 156 for( my $en =1; $en<=$#allx; $en++ ) { 157 my $x = $allx[$en]; 158 my $y = $ally[$en]; 159 $gd->line($xold,$yold,$x,$y,$edgecolor); 160 ($xold,$yold)= ($x,$y); 161 } 162 163 # add scale bar to left and mid-point 164 if ($profilen == 1) { 165 my($xmin,$yyy,$ymax) = ($allx[1]-1,$yf2-$yw,$ally[0]); 166 my $xmid = int(($allx[-1] - $xmin)/2); 167 $self->add_scale_bar([$xmin,$xmid],$yyy,$max_signal,$gd); 168 $self->{_xmid} = $xmid; 169 $self->{_ymin} = $ymax; 170 } 171 172 $self->{_key}->{$subsetsnames->{$subset}} = $color; 173 174 $gd->string(GD::Font->Tiny,$xf2+$xshift+3, $yf2-$yshift-5,$subsetsnames->{$subset},$color); 175 176 unless( $profilen ==0 ) { @prevx = @allx; @prevy = @ally; @prevvals = @allvals; } 177 $profilen++; 178 } 179 180 my $hide_key = defined $self->option('show key') && !$self->option('show key'); 181 $self->add_subset_key($gd,$subsets) unless $hide_key; 182 183 $gd->flipVertical() if $self->option('flip vertical'); 184 185 return; 186} 187 188sub add_scale_bar { 189 my ($self,$xx,$y,$max_signal,$gd) = @_; 190 return if $self->option('flip vertical'); 191 for my $x (@$xx) { 192 $gd->string(GD::Font->Tiny,$x-12, $y-3,'0',$black); 193 $gd->line($x-2,$y,$x-2,$y-50,$black); 194 $gd->line($x-4,$y-47,$x-2,$y-50,$black); 195 $gd->line($x,$y-47,$x-2,$y-50,$black); 196 $gd->line($x-4,$y-44,$x-2,$y-44,$black); 197 $gd->string(GD::Font->Tiny,$x-18, $y-47,int($max_signal+0.5),$black); 198 } 199} 200 201sub add_subset_key { 202 my ($self,$gd,$subsets) = @_; 203 my @subsets = grep {!/MAX/} @$subsets; 204 return if $self->option('flip vertical'); 205 my $first_x = $self->{_xmid}-250; 206 my $first_y = 12; 207 my $key_colors = $self->{_key}; 208 my $font = GD::Font->MediumBold; 209 my $width = $self->width; 210 my $total_key_width = 18; 211 212 my ($longest_string) = sort {$b <=> $a} 213 map {$self->string_width($_,$font)} @subsets; 214 215 my $count; 216 my $x = $first_x; 217 my $y = $first_y; 218 219 my $cutoff = 100; 220 if (@subsets > 8 && !(@subsets %2)) { 221 $cutoff = @subsets/2 + 1; 222 } 223 elsif (@subsets > 8) { 224 $cutoff = int(@subsets/2 + 0.5); 225 } 226 227 for my $subset (@subsets) { 228 if (++$count == $cutoff) { 229 $x = $first_x; 230 $y = $first_y + 12; 231 } 232 my $color = $key_colors->{$subset}; 233 my $edgecolor = $self->{fgcolor}; 234 my $string_width = $self->string_width($subset,$font); 235 $gd->rectangle($x,$y,$x+10,$y+10,$edgecolor); 236 $gd->filledRectangle($x,$y,$x+10,$y+10,$color); 237 $x += 14; 238 $gd->string($font,$x,$y,$subset,$black); 239 $x += $longest_string + 8; 240 } 241} 242 243#-------------------------- 244sub getData { 245 my $self = shift; 246 my($ft,$datadir,$chromosome,$start,$stop,$scrstart,$scrstop,$flipped,$gd) = @_; 247 my $global_max_signal = $self->option('max_score') || 0; 248 my %Signals = (); 249 $self->openDataFiles($datadir); 250 251 my $subset_text = $self->option('subset order'); 252 if ($subset_text) { 253 my @words = shellwords($subset_text); 254 255 # subset + color 256 if (!(@words %2) && $words[1] =~ /^[0-9A-F]{6}$/ && $words[2] !~ /^[.0-9]+$/) { 257 while (@words) { 258 push @{$ft->{subsetsorder}}, [splice(@words,0,2)]; 259 } 260 } 261 # subset + color + alpha 262 elsif (!(@words %3) && $words[1] =~ /^[0-9A-F]{6}$/) { 263 while (@words) { 264 push @{$ft->{subsetsorder}}, [splice(@words,0,3)]; 265 } 266 } 267 # no color specified? Random color for you. Good luck! 268 else { 269 for my $word (@words) { 270 push @{$ft->{subsetsorder}}, [$word,$self->random_color()]; 271 } 272 } 273 274 } 275 276 my @subsets = (exists $ft->{'subsetsorder'}) ? @{$ft->{'subsetsorder'}} : sort split(/\t+/,$Indices{'subsets'}); 277 278 my $user_max = $self->option('max_score'); 279 280 # This bit of code reads in user-specified bgcolor, if provided 281 if ( ref $subsets[0] eq 'ARRAY' ) { 282 for (@subsets) { 283 next unless ref $_ eq 'ARRAY'; 284 my ($subset,$color,$alpha) = @$_; 285 $alpha ||= $self->option('fill opacity') || 1.0; 286 287 if ($alpha && $alpha > 1) { 288 die "Alpha must be between zero and 1"; 289 } 290 291 # make it hex if it looks like hex 292 if ((length $color == 6) && $color =~ /^[0-9A-F]+$/) { 293 $color = '#'.$color; 294 } 295 my $bgcolor = $self->factory->transparent_color($alpha,$color); 296 my $fgcolor = $self->translate_color($color); 297 $self->{bgcolor}->{$subset} = $bgcolor; 298 299 # We will re-use this array later 300 $_ = $subset; 301 } 302 } 303 304 shift(@subsets) if $subsets[0] eq 'MAX'; 305 warn("subsets: @subsets\n") if DEBUG; 306 307 my %SubsetsNames = (exists $ft->{'subsetsnames'}) ? %{$ft->{'subsetsnames'}} : map { $_, $_ } @subsets; 308 $SubsetsNames{MAX}= 'MAX'; 309 my $screenstep = ($scrstop-$scrstart+1) * 1.0 / ($stop-$start+1); 310 my $donecoords = 0; 311 my $local_max_signal = 0; 312 313 foreach my $subset ( @subsets ) { 314 my $nstrings = 0; 315 # scan seq ranges offsets to see where to start reading 316 my $key = $subset.':'.$chromosome; 317 my $poskey = $key.':offsets'; 318 my $ranges_pos = (exists $Indices{$poskey}) ? int($Indices{$poskey}) : -1; 319 if( $ranges_pos == -1 ) { next; } # no such signal.. 320 warn(" positioning for $poskey starts at $ranges_pos\n") if DEBUG; 321 if( $start>=1000000 ) { 322 my $bigstep = int($start/1000000.0); 323 if( exists $Indices{$key.':offsets:'.$bigstep} ) { 324 my $jumpval = $Indices{$key.':offsets:'.$bigstep}; 325 warn(" jump in offset search to $jumpval\n") if DEBUG; 326 $ranges_pos = int($jumpval); } 327 } 328 seek(DATF,$ranges_pos,0); 329 my($offset,$offset1)= (0,0); 330 my $lastseqloc = -999999999; 331 my $useoffset = 0; 332 while( (my $strs =<DATF>) ) { 333 $nstrings++ if DEBUG; 334 if( DEBUG ) { 335 chop($strs); warn(" positioning read for coord $start ($strs)\n"); } 336 last unless $strs =~m/^(-?\d+)[ \t]+(\d+)/; 337 my($seqloc,$fileoffset)= ($1,$2); 338 if( DEBUG ) { 339 chop($strs); warn(" positioning read for $poskey => $seqloc, $fileoffset ($strs)\n"); } 340 $offset1 = $offset; 341 $offset = $fileoffset; 342 $lastseqloc = $seqloc; 343 if( $seqloc > $start ) { $useoffset = int($offset1); last; } 344 } 345 warn(" will use offset $useoffset\n") if DEBUG; 346 warn(" (scanned $nstrings offset strings)\n") if DEBUG; 347 if( $useoffset ==0 ) { # data offset cannot be 0 - means didn't find where to read required data.. 348 next; 349 my @emptyvals = (); 350 for( my $ii = $scrstart; $ii++ <= $scrstop; ) { push(@emptyvals,0); } 351 $Signals{$subset}= \@emptyvals; 352 } 353 $nstrings = 0; 354 # read signal profile 355 seek(DATF,$useoffset,0); 356 $lastseqloc = -999999999; 357 my $lastsignal = 0; 358 my($scrx,$scrxold)= ($scrstart,$scrstart-1); 359 my $runmax = 0; 360 my @values = (); 361 my @xscreencoords = (); 362 363 while( (my $str =<DATF>) ) { 364 $nstrings++ if DEBUG; 365 unless( $str =~m/^(-?\d+)[ \t]+(\d+)/ ) { 366 warn(" header read: $str") if DEBUG; 367 last; # because no headers were indexed at the beginning of data packs 368 } 369 my($seqloc,$signal)= ($1,$2); 370 my $real_signal = $signal; 371 $signal = $user_max if $user_max && $signal > $user_max; 372 $local_max_signal = $signal if $signal > $local_max_signal; 373 374 warn(" signal read: $seqloc, $signal line: $str") if DEBUG; 375 last if $lastseqloc > $seqloc; # just in case, as all sits merged in one file.. 376 if( $seqloc>=$start ) { # current is the next one after the one we need to start from.. 377 unless( $lastseqloc == -999999999 ) { # expand previous 378 $lastseqloc = $start-2 if $lastseqloc<$start; # limit empty steps (they may start from -200000) 379 while( $lastseqloc < $seqloc ) { # until another (one we just retrieved) wiggle reading 380 last if $lastseqloc > $stop; # end of subset data 381 next if $lastseqloc++ < $start; 382 # we have actual new seq position in our required range 383 my $scrpos = int($scrx); 384 $runmax = $lastsignal if $runmax < $lastsignal; 385 if( $scrpos != $scrxold ) { # we have actual new seq _and_ screen position 386 push(@values,$runmax); 387 push(@xscreencoords,$scrpos) unless $donecoords; 388 #print STDERR Dumper \@xscreencoords unless $donecoords; 389 $scrxold = $scrpos; 390 $runmax = 0; 391 } 392 $scrx += $screenstep; # remember - it is not integer 393 } 394 } 395 } 396 ($lastseqloc,$lastsignal)= ($seqloc,$signal); 397 last if $seqloc > $stop; # end of subset data 398 } 399 if( $lastseqloc < $stop ) { # if on the end of signal profile, but still in screen range 400 # just assume that we are getting one more reading with signal == 0 401 my $signal = 0; 402 while( $lastseqloc++ < $stop ) { 403 my $scrpos = int($scrx); 404 if( $scrpos != $scrxold ) { # we have actual new seq _and_ screen position 405 push(@values,$signal); 406 push(@xscreencoords,$scrpos) unless $donecoords; 407 $scrxold = $scrpos; 408 } 409 $scrx += $screenstep; 410 } 411 } 412 warn(" (scanned $nstrings signal strings)\n") if DEBUG; 413 $nstrings = 0; 414 if( $flipped ) { 415 my @ch = reverse @values; @values = @ch; 416 } 417 warn(" ".$subset."=> ".@values." values @values\n") if DEBUG && $#values<1000; 418 $Signals{$subset}= \@values; 419 $Signals{screencoords}= \@xscreencoords unless $donecoords; 420 $donecoords = 1; 421 } # foreach my $subset ( @subsets ) { 422 423 # scaling can be local, user-defined max or global max 424 my $scale = $self->option('autoscale') || 'global'; 425 if ($scale eq 'local' && !$user_max) { 426 $self->max_score($local_max_signal); 427 } 428 else { 429 $self->max_score($user_max || $Indices{max_signal}); 430 } 431 432 warn(" max_signal => ".$self->max_score." \n") if DEBUG; 433 434 # prepare MAX profile - will be used as a base for exon/UTR prediction 435 $self->{no_max} = defined $self->option('show max') && ! $self->option('show max'); 436 unless ($self->{no_max}) { 437 my @maxprofile = (); 438 my @ruler = @{$Signals{screencoords}}; 439 for( my $npos = 0; $npos<=$#ruler; $npos++ ) { 440 my $maxval = 0; 441 foreach my $subset ( @subsets ) { 442 my $p = $Signals{$subset}; 443 my $val = $p->[$npos]; 444 $maxval = $val if $maxval < $val; 445 } 446 push(@maxprofile,$maxval); 447 } 448 $Signals{MAX}= \@maxprofile; 449 warn(" MAX => ".@maxprofile." values @maxprofile\n") if DEBUG && $#maxprofile<1000; 450 unshift(@subsets,'MAX'); 451 } 452 453 return(\@subsets,\%SubsetsNames, \%Signals); 454} 455 456#-------------------------- 457sub openDataFiles { 458 my $self = shift; 459 my $datadir = shift; 460 $datadir.= '/' unless $datadir =~m|/$|; 461 my $datafile = $datadir.'data.cat'; 462 open(DATF,$datafile) || die("cannot open $datafile\n"); 463 use BerkeleyDB; # caller should already used proper 'use lib' command with path 464 my $bdbfile = $datadir . 'index.bdbhash'; 465 tie %Indices, "BerkeleyDB::Hash", -Filename => $bdbfile, -Flags => DB_RDONLY || warn("can't read BDBHash $bdbfile\n"); 466 if( DEBUG ) { foreach my $kk ( sort keys %Indices ) { warn(" $kk => ".$Indices{$kk}."\n"); } } 467 return; 468} 469 470sub min_score { 471 # not implemented 472} 473 474sub max_score { 475 my $self = shift; 476 my $score = shift; 477 $self->{max_score} ||= $score; 478 return $self->{max_score}; 479} 480 481sub random_color { 482 my $self = shift; 483 my @nums = 0..9,'A'..'F'; 484 my $color; 485 for (0..5) { 486 my @array = shuffle(@nums); 487 my $char = shift @array; 488 $color .= $char; 489 } 490 return $color; 491} 492 493 4941; 495 496 497