1# BioPerl module for Bio::Graphics::Pictogram 2# 3# Cared for by Shawn Hoon <shawnh@fugu-sg.org> 4# 5# Copyright Shawn Hoon 6# 7# You may distribute this module under the same terms as perl itself 8 9# POD documentation - main docs before the code 10 11=head1 NAME 12 13Bio::Graphics::Pictogram - generate SVG output of Pictogram display for consensus motifs 14 15=head1 SYNOPSIS 16 17 use Bio::Graphics::Pictogram; 18 use Bio::SeqIO; 19 20 my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'fasta'); 21 my @seq; 22 while(my $seq = $sio->next_seq){ 23 push @seq, $seq; 24 } 25 26 my $picto = Bio::Graphics::Pictogram->new(-width=>"800", 27 -height=>"500", 28 -fontsize=>"60", 29 -plot_bits=>1, 30 -background=>{ 31 'A'=>0.25, 32 'C'=>0.18, 33 'T'=>0.32, 34 'G'=>0.25}, 35 -color=>{'A'=>'red', 36 'G'=>'blue', 37 'C'=>'green', 38 'T'=>'magenta'}); 39 40 my $svg = $picto->make_svg(\@seq); 41 42 print $svg->xmlify."\n"; 43 44 #Support for Bio::Matrix::PSM::SiteMatrix now included 45 46 use Bio::Matrix::PSM::IO; 47 48 my $picto = Bio::Graphics::Pictogram->new(-width=>"800", 49 -height=>"500", 50 -fontsize=>"60", 51 -plot_bits=>1, 52 -background=>{ 53 'A'=>0.25, 54 'C'=>0.18, 55 'T'=>0.32, 56 'G'=>0.25}, 57 -color=>{'A'=>'red', 58 'G'=>'blue', 59 'C'=>'green', 60 'T'=>'magenta'}); 61 62 my $psm = $psmIO->next_psm; 63 my $svg = $picto->make_svg($psm); 64 print $svg->xmlify; 65 66 67=head1 DESCRIPTION 68 69A module for generating SVG output of Pictogram display for consensus 70motifs. This method of representation was describe by Burge and 71colleagues: (Burge, C.B.,Tuschl, T., Sharp, P.A. in The RNA world II, 72525-560, CSHL press, 1999) 73 74This is a simple module that takes in an array of sequences (assuming 75equal lengths) and calculates relative base frequencies where the 76height of each letter reflects the frequency of each nucleotide at a 77given position. It can also plot the information content at each 78position scaled by the background frequencies of each nucleotide. 79 80It requires the SVG-2.26 or later module by Ronan Oger available at 81http://www.cpan.org 82 83Recommended viewing of the SVG is the plugin available at Adobe: 84http://www.adobe.com/svg 85 86=head1 FEEDBACK 87 88 89=head2 Mailing Lists 90 91User feedback is an integral part of the evolution of this and other 92Bioperl modules. Send your comments and suggestions preferably to one 93of the Bioperl mailing lists. Your participation is much appreciated. 94 95 bioperl-l@bioperl.org - General discussion 96 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 97 98=head2 Reporting Bugs 99 100Report bugs to the Bioperl bug tracking system to help us keep track 101the bugs and their resolution. Bug reports can be submitted via the 102web: 103 104 http://bugzilla.open-bio.org/ 105 106=head1 AUTHOR - Shawn Hoon 107 108Email shawnh@fugu-sg.org 109 110 111=head1 APPENDIX 112 113 114The rest of the documentation details each of the object 115methods. Internal methods are usually preceded with a "_". 116 117=cut 118 119package Bio::Graphics::Pictogram; 120use strict; 121use SVG 2.26; 122use Bio::SeqIO; 123use base qw(Bio::Root::Root); 124 125use constant MAXBITS => 2; 126 127=head2 new 128 129 Title : new 130 Usage : my $picto = Bio::Graphics::Pictogram->new(-width=>"800", 131 -height=>"500", 132 -fontsize=>"60", 133 -plot_bits=>1, 134 -background=>{ 135 'A'=>0.25, 136 'C'=>0.18, 137 'T'=>0.32, 138 'G'=>0.25}, 139 -color=>{'A'=>'red', 140 'G'=>'blue', 141 'C'=>'green', 142 'T'=>'magenta'}); 143 Function: Constructor for Pictogram Object 144 Returns : L<Bio::Graphics::Pictogram> 145 146=cut 147 148sub new { 149 my ($caller,@args) = @_; 150 my $self = $caller->SUPER::new(@args); 151 my ($width,$height,$fontsize,$color,$background,$bit,$normalize) = $self->_rearrange([qw(WIDTH HEIGHT FONTSIZE COLOR BACKGROUND PLOT_BITS NORMALIZE)],@args); 152 $width||=800; 153 $height||=600; 154 my $svg = SVG->new(width=>$width,height=>$height); 155 $self->svg_obj($svg); 156 $fontsize ||= 80; 157 $self->fontsize($fontsize) if $fontsize; 158 $color = $color || {'T'=>'black','C'=>'blue','G'=>'green','A'=>'red'}; 159 $self->color($color); 160 $background = $background || {'T'=>0.25,'C'=>0.25,'G'=>0.25,'A'=>0.25}; 161 $self->background($background); 162 $self->plot_bits($bit) if $bit; 163 $self->normalize($normalize) if $normalize; 164 165 return $self; 166} 167 168=head2 make_svg 169 170 Title : make_svg 171 Usage : $picto->make_svg(); 172 Function: make the SVG object 173 Returns : L<SVG> 174 Arguments: A fasta file or array ref of L<Bio::Seq> objects or a L<Bio::Matrix::PSM::SiteMatrixI> 175 176=cut 177 178sub make_svg { 179 my ($self,$input) = @_; 180 my $fontsize = $self->fontsize; 181 my $size = $fontsize * 0.75; 182 my $width= $size; 183 my $height= $size+40; 184 my $color = $self->color; 185 186 #starting x coordinate for pictogram 187 my $x = 45+$size/2; 188 my $pos_y = $size * 2; 189 my $bit_y = $pos_y+40; 190 my @pwm; 191 192 my $bp = 1; 193 194 #input can be file or array ref of sequences 195 if(ref($input) eq 'ARRAY'){ 196 @pwm = @{$self->_make_pwm($input)}; 197 } 198 elsif(ref($input) && $input->isa("Bio::Matrix::PSM::SiteMatrixI")){ 199 @pwm = $self->_make_pwm_from_site_matrix($input); 200 } 201 else { 202 my $sio = Bio::SeqIO->new(-file=>$input,-format=>"fasta"); 203 my @seq; 204 while (my $seq = $sio->next_seq){ 205 push @seq, $seq; 206 } 207 @pwm = @{$self->_make_pwm(\@seq)}; 208 } 209 210 211 my $svg = $self->svg_obj; 212 my $seq_length = scalar(@pwm + 1) * $width + $x + $x; 213 my $seq_grp; 214 215 #scale the svg if length greater than svg width 216 if($seq_length > $svg->{-document}->{'width'}){ 217 my $ratio = $svg->{-document}->{'width'}/($seq_length); 218 $seq_grp = $svg->group(transform=>"scale($ratio,1)"); 219 } 220 else { 221 $seq_grp= $svg->group(); 222 } 223 224 #do the drawing, each set is a base position 225 foreach my $set(@pwm){ 226 my ($A,$C,$G,$T,$bits) = @$set; 227 my @array; 228 push @array, ['a',($A)]; 229 push @array, ['g',($G)]; 230 push @array, ['c',($C)]; 231 push @array, ['t',($T)]; 232 @array = sort {$b->[1]<=>$a->[1]}@array; 233 my $count = 1; 234 my $pos_group = $seq_grp->group(id=>"bp $bp"); 235 my $prev_size; 236 my $y_trans; 237 238 #draw each letter at each position 239 foreach my $letter(@array){ 240 my $scale; 241 if($self->normalize){ 242 $scale = $letter->[1]; 243 } else { 244 $scale = $letter->[1] * ($bits / MAXBITS); 245 } 246 247 if($count == 1){ 248 if($self->normalize){ 249 $y_trans = 0; 250 } else { 251 $y_trans = (1 - ($bits / MAXBITS)) * $size; 252 } 253 } 254 else { 255 $y_trans += $prev_size; 256 } 257 $pos_group->text('id'=> uc($letter->[0]).$bp,height=>$height, 258 'width'=>$width,x=>$x,y=>$size, 259 'transform'=>"translate(0,$y_trans),scale(1,$scale)", 260 'style'=>{"font-size"=>$fontsize, 261 'text-anchor'=>'middle', 262 'font-family'=>'Verdana', 263 'fill'=>$color->{uc $letter->[0]}})->cdata(uc $letter->[0]) if $scale > 0; 264 265 $prev_size = $scale * $size; 266 $count++; 267 } 268 #plot the bit if required 269 if($self->plot_bits){ 270 $seq_grp->text('x'=>$x, 271 'y'=>$bit_y, 272 'style'=>{"font-size"=>'10', 273 'text-anchor'=>'middle', 274 'font-family'=>'Verdana', 275 'fill'=>'black'})->cdata($bits); 276 } 277 $bp++; 278 $x+=$width; 279 } 280 281 #plot the tags 282 $seq_grp->text(x=>int($width/2),y=>$bit_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Bits:") if $self->plot_bits; 283 284 $seq_grp->text(x=>int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Position:"); 285 286 #plot the base positions 287 $x = 45+$size/2-int($width/2); 288 foreach my $nbr(1..($bp-1)){ 289 $seq_grp->text(x=>$x+int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'left','font-family'=>'Verdana','fill'=>'black'})->cdata($nbr); 290 $x+=$width; 291 } 292 293 294# $seq_grp->transform("scale(2,2)"); 295 296 return $self->svg_obj($svg); 297} 298 299sub _make_pwm_from_site_matrix{ 300 my ($self,$matrix) = @_; 301 my $bgd = $self->background; 302 my @pwm; 303 my $consensus = $matrix->consensus; 304 foreach my $i(1..length($consensus)){ 305 my %base = $matrix->next_pos; 306 my $bits; 307 $bits+=($base{pA} * log2($base{pA}/$bgd->{'A'})); 308 $bits+=($base{pC} * log2($base{pC}/$bgd->{'C'})); 309 $bits+=($base{pG} * log2($base{pG}/$bgd->{'G'})); 310 $bits+=($base{pT} * log2($base{pT}/$bgd->{'T'})); 311 push @pwm, [$base{pA},$base{pC},$base{pG},$base{pT},abs(sprintf("%.3f",$bits))]; 312 } 313 return @pwm; 314} 315 316sub _make_pwm { 317 my ($self,$input) = @_; 318 my $count = 1; 319 my %hash; 320 my $bgd = $self->background; 321 #sum up the frequencies at each base pair 322 foreach my $seq(@$input){ 323 my $string = $seq->seq; 324 $string = uc $string; 325 my @motif = split('',$string); 326 my $pos = 1; 327 foreach my $t(@motif){ 328 $hash{$pos}{$t}++; 329 $pos++; 330 } 331 $count++; 332 } 333 334 #calculate relative freq 335 my @pwm; 336 337 #decrement last count 338 $count--; 339 foreach my $pos(sort{$a<=>$b} keys %hash){ 340 my @array; 341 push @array,($hash{$pos}{'A'}||0)/$count; 342 push @array,($hash{$pos}{'C'}||0)/$count; 343 push @array,($hash{$pos}{'G'}||0)/$count; 344 push @array,($hash{$pos}{'T'}||0)/$count; 345 346 #calculate bits 347 # relative entropy (RelEnt) or Kullback-Liebler distance 348 # relent = sum fk * log2(fk/gk) where fk is frequency of nucleotide k and 349 # gk the background frequency of nucleotide k 350 351 my $bits; 352 $bits+=(($hash{$pos}{'A'}||0) / $count) * log2((($hash{$pos}{'A'}||0)/$count) / ($bgd->{'A'})); 353 $bits+=(($hash{$pos}{'C'}||0) / $count) * log2((($hash{$pos}{'C'}||0)/$count) / ($bgd->{'C'})); 354 $bits+=(($hash{$pos}{'G'}||0) / $count) * log2((($hash{$pos}{'G'}||0)/$count) / ($bgd->{'G'})); 355 $bits+=(($hash{$pos}{'T'}||0) / $count) * log2((($hash{$pos}{'T'}||0)/$count) / ($bgd->{'T'})); 356 push @array, abs(sprintf("%.3f",$bits)); 357 358 push @pwm,\@array; 359 } 360 return $self->pwm(\@pwm); 361} 362 363 364###various get/sets 365 366=head2 fontsize 367 368 Title : fontsize 369 Usage : $picto->fontsize(); 370 Function: get/set for fontsize 371 Returns : int 372 Arguments: int 373 374=cut 375 376sub fontsize { 377 my ($self,$obj) = @_; 378 if($obj){ 379 $self->{'_fontsize'} = $obj; 380 } 381 return $self->{'_fontsize'}; 382} 383 384=head2 color 385 386 Title : color 387 Usage : $picto->color(); 388 Function: get/set for color 389 Returns : a hash reference 390 Arguments: a hash reference 391 392=cut 393 394sub color { 395 my ($self,$obj) = @_; 396 if($obj){ 397 $self->{'_color'} = $obj; 398 } 399 return $self->{'_color'}; 400} 401 402=head2 svg_obj 403 404 Title : svg_obj 405 Usage : $picto->svg_obj(); 406 Function: get/set for svg_obj 407 Returns : L<SVG> 408 Arguments: L<SVG> 409 410=cut 411 412sub svg_obj { 413 my ($self,$obj) = @_; 414 if($obj){ 415 $self->{'_svg_obj'} = $obj; 416 } 417 return $self->{'_svg_obj'}; 418} 419 420=head2 plot_bits 421 422 Title : plot_bits 423 Usage : $picto->plot_bits(); 424 Function: get/set for plot_bits to indicate whether to plot 425 information content at each base position 426 Returns :1/0 427 Arguments: 1/0 428 429=cut 430 431sub plot_bits { 432 my ($self,$obj) = @_; 433 if($obj){ 434 $self->{'_plot_bits'} = $obj; 435 } 436 return $self->{'_plot_bits'}; 437} 438 439=head2 normalize 440 441 Title : normalize 442 Usage : $picto->normalize($newval) 443 Function: get/set to make all columns the same height. 444 default is to scale height with information 445 content. 446 Returns : value of normalize (a scalar) 447 Args : on set, new value (a scalar or undef, optional) 448 449 450=cut 451 452sub normalize{ 453 my $self = shift; 454 455 return $self->{'normalize'} = shift if @_; 456 return $self->{'normalize'}; 457} 458 459=head2 background 460 461 Title : background 462 Usage : $picto->background(); 463 Function: get/set for hash reference of nucleodtide bgd frequencies 464 Returns : hash reference 465 Arguments: hash reference 466 467=cut 468 469sub background { 470 my ($self,$obj) = @_; 471 if($obj){ 472 $self->{'_background'} = $obj; 473 } 474 return $self->{'_background'}; 475} 476 477=head2 pwm 478 479 Title : pwm 480 Usage : $picto->pwm(); 481 Function: get/set for pwm 482 Returns : int 483 Arguments: int 484 485=cut 486 487sub pwm { 488 my ($self,$pwm) = @_; 489 if($pwm){ 490 $self->{'_pwm'} = $pwm; 491 } 492 return $self->{'_pwm'}; 493} 494 495#utility method for returning log 2 496sub log2 { 497 my ($val) = @_; 498 return 0 if $val==0; 499 return log($val)/log(2); 500} 501 502 5031; 504