1# BioPerl module for Bio::Graphics::Pictogram
2#
3# Cared for by Shawn Hoon <shawnh@fugu-sg.org>
4#
5# Copyright Shawn Hoon
6#
7# You may distribute this module under the same terms as perl itself
8
9# POD documentation - main docs before the code
10
11=head1 NAME
12
13Bio::Graphics::Pictogram - generate SVG output of Pictogram display for consensus motifs
14
15=head1 SYNOPSIS
16
17  use Bio::Graphics::Pictogram;
18  use Bio::SeqIO;
19
20  my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'fasta');
21  my @seq;
22  while(my $seq = $sio->next_seq){
23    push @seq, $seq;
24  }
25
26  my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
27                                            -height=>"500",
28                                            -fontsize=>"60",
29                                            -plot_bits=>1,
30                                            -background=>{
31                                                          'A'=>0.25,
32                                                          'C'=>0.18,
33                                                          'T'=>0.32,
34                                                          'G'=>0.25},
35                                            -color=>{'A'=>'red',
36                                                     'G'=>'blue',
37                                                     'C'=>'green',
38                                                     'T'=>'magenta'});
39
40  my $svg = $picto->make_svg(\@seq);
41
42  print $svg->xmlify."\n";
43
44  #Support for Bio::Matrix::PSM::SiteMatrix now included
45
46   use Bio::Matrix::PSM::IO;
47
48   my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
49                                            -height=>"500",
50                                            -fontsize=>"60",
51                                            -plot_bits=>1,
52                                            -background=>{
53                                                          'A'=>0.25,
54                                                          'C'=>0.18,
55                                                          'T'=>0.32,
56                                                          'G'=>0.25},
57                                            -color=>{'A'=>'red',
58                                                     'G'=>'blue',
59                                                     'C'=>'green',
60                                                     'T'=>'magenta'});
61
62  my $psm = $psmIO->next_psm;
63  my $svg = $picto->make_svg($psm);
64  print $svg->xmlify;
65
66
67=head1 DESCRIPTION
68
69A module for generating SVG output of Pictogram display for consensus
70motifs.  This method of representation was describe by Burge and
71colleagues: (Burge, C.B.,Tuschl, T., Sharp, P.A. in The RNA world II,
72525-560, CSHL press, 1999)
73
74This is a simple module that takes in an array of sequences (assuming
75equal lengths) and calculates relative base frequencies where the
76height of each letter reflects the frequency of each nucleotide at a
77given position. It can also plot the information content at each
78position scaled by the background frequencies of each nucleotide.
79
80It requires the SVG-2.26 or later module by Ronan Oger available at
81http://www.cpan.org
82
83Recommended viewing of the SVG is the plugin available at Adobe:
84http://www.adobe.com/svg
85
86=head1 FEEDBACK
87
88
89=head2 Mailing Lists
90
91User feedback is an integral part of the evolution of this and other
92Bioperl modules. Send your comments and suggestions preferably to one
93of the Bioperl mailing lists. Your participation is much appreciated.
94
95  bioperl-l@bioperl.org                  - General discussion
96  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
97
98=head2 Reporting Bugs
99
100Report bugs to the Bioperl bug tracking system to help us keep track
101the bugs and their resolution.  Bug reports can be submitted via the
102web:
103
104  http://bugzilla.open-bio.org/
105
106=head1 AUTHOR - Shawn Hoon
107
108Email shawnh@fugu-sg.org
109
110
111=head1 APPENDIX
112
113
114The rest of the documentation details each of the object
115methods. Internal methods are usually preceded with a "_".
116
117=cut
118
119package Bio::Graphics::Pictogram;
120use strict;
121use SVG 2.26;
122use Bio::SeqIO;
123use base qw(Bio::Root::Root);
124
125use constant MAXBITS => 2;
126
127=head2 new
128
129 Title   : new
130 Usage   : my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
131                                            -height=>"500",
132                                            -fontsize=>"60",
133                                            -plot_bits=>1,
134                                            -background=>{
135                                                          'A'=>0.25,
136                                                          'C'=>0.18,
137                                                          'T'=>0.32,
138                                                          'G'=>0.25},
139                                            -color=>{'A'=>'red',
140                                                      'G'=>'blue',
141                                                      'C'=>'green',
142                                                      'T'=>'magenta'});
143 Function: Constructor for Pictogram Object
144 Returns : L<Bio::Graphics::Pictogram>
145
146=cut
147
148sub new {
149  my ($caller,@args) = @_;
150  my $self = $caller->SUPER::new(@args);
151  my ($width,$height,$fontsize,$color,$background,$bit,$normalize) = $self->_rearrange([qw(WIDTH HEIGHT FONTSIZE COLOR BACKGROUND PLOT_BITS NORMALIZE)],@args);
152  $width||=800;
153  $height||=600;
154  my $svg = SVG->new(width=>$width,height=>$height);
155  $self->svg_obj($svg);
156  $fontsize ||= 80;
157  $self->fontsize($fontsize) if $fontsize;
158  $color = $color || {'T'=>'black','C'=>'blue','G'=>'green','A'=>'red'};
159  $self->color($color);
160  $background = $background || {'T'=>0.25,'C'=>0.25,'G'=>0.25,'A'=>0.25};
161  $self->background($background);
162  $self->plot_bits($bit) if $bit;
163  $self->normalize($normalize) if $normalize;
164
165  return $self;
166}
167
168=head2 make_svg
169
170 Title   : make_svg
171 Usage   : $picto->make_svg();
172 Function: make the SVG object
173 Returns : L<SVG>
174 Arguments: A fasta file or array ref of L<Bio::Seq> objects or a L<Bio::Matrix::PSM::SiteMatrixI>
175
176=cut
177
178sub make_svg {
179  my ($self,$input) = @_;
180  my $fontsize = $self->fontsize;
181  my $size = $fontsize * 0.75;
182  my $width= $size;
183  my $height= $size+40;
184  my $color = $self->color;
185
186  #starting x coordinate for pictogram
187  my $x = 45+$size/2;
188  my $pos_y = $size * 2;
189  my $bit_y = $pos_y+40;
190  my @pwm;
191
192  my $bp = 1;
193
194  #input can be file or array ref of sequences
195  if(ref($input) eq 'ARRAY'){
196    @pwm = @{$self->_make_pwm($input)};
197  }
198  elsif(ref($input) && $input->isa("Bio::Matrix::PSM::SiteMatrixI")){
199    @pwm = $self->_make_pwm_from_site_matrix($input);
200  }
201  else {
202    my $sio = Bio::SeqIO->new(-file=>$input,-format=>"fasta");
203    my @seq;
204    while (my $seq = $sio->next_seq){
205      push @seq, $seq;
206    }
207    @pwm = @{$self->_make_pwm(\@seq)};
208  }
209
210
211  my $svg = $self->svg_obj;
212  my $seq_length = scalar(@pwm + 1) * $width + $x + $x;
213  my $seq_grp;
214
215  #scale the svg if length greater than svg width
216  if($seq_length > $svg->{-document}->{'width'}){
217    my $ratio = $svg->{-document}->{'width'}/($seq_length);
218    $seq_grp = $svg->group(transform=>"scale($ratio,1)");
219  }
220  else {
221    $seq_grp= $svg->group();
222  }
223
224  #do the drawing, each set is a base position
225  foreach my $set(@pwm){
226    my ($A,$C,$G,$T,$bits) = @$set;
227    my @array;
228    push @array,  ['a',($A)];
229    push @array, ['g',($G)];
230    push @array, ['c',($C)];
231    push @array, ['t',($T)];
232    @array = sort {$b->[1]<=>$a->[1]}@array;
233    my $count = 1;
234    my $pos_group = $seq_grp->group(id=>"bp $bp");
235    my $prev_size;
236    my $y_trans;
237
238    #draw each letter at each position
239    foreach my $letter(@array){
240	  my $scale;
241	  if($self->normalize){
242		$scale = $letter->[1];
243	  } else {
244		$scale = $letter->[1] * ($bits / MAXBITS);
245	  }
246
247      if($count == 1){
248		if($self->normalize){
249		  $y_trans = 0;
250		} else {
251		  $y_trans = (1 - ($bits / MAXBITS)) * $size;
252		}
253      }
254      else {
255        $y_trans += $prev_size;
256      }
257      $pos_group->text('id'=> uc($letter->[0]).$bp,height=>$height,
258                      'width'=>$width,x=>$x,y=>$size,
259                      'transform'=>"translate(0,$y_trans),scale(1,$scale)",
260                      'style'=>{"font-size"=>$fontsize,
261                      'text-anchor'=>'middle',
262                      'font-family'=>'Verdana',
263                      'fill'=>$color->{uc $letter->[0]}})->cdata(uc $letter->[0]) if $scale > 0;
264
265     $prev_size = $scale * $size;
266     $count++;
267    }
268    #plot the bit if required
269    if($self->plot_bits){
270         $seq_grp->text('x'=>$x,
271                        'y'=>$bit_y,
272                        'style'=>{"font-size"=>'10',
273                                'text-anchor'=>'middle',
274                                'font-family'=>'Verdana',
275                                'fill'=>'black'})->cdata($bits);
276    }
277    $bp++;
278    $x+=$width;
279  }
280
281  #plot the tags
282  $seq_grp->text(x=>int($width/2),y=>$bit_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Bits:") if $self->plot_bits;
283
284 $seq_grp->text(x=>int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Position:");
285
286  #plot the base positions
287  $x = 45+$size/2-int($width/2);
288  foreach my $nbr(1..($bp-1)){
289    $seq_grp->text(x=>$x+int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'left','font-family'=>'Verdana','fill'=>'black'})->cdata($nbr);
290    $x+=$width;
291  }
292
293
294#  $seq_grp->transform("scale(2,2)");
295
296  return $self->svg_obj($svg);
297}
298
299sub _make_pwm_from_site_matrix{
300  my ($self,$matrix) = @_;
301  my $bgd = $self->background;
302  my @pwm;
303  my $consensus = $matrix->consensus;
304  foreach my $i(1..length($consensus)){
305    my %base = $matrix->next_pos;
306    my $bits;
307    $bits+=($base{pA} * log2($base{pA}/$bgd->{'A'}));
308    $bits+=($base{pC} * log2($base{pC}/$bgd->{'C'}));
309    $bits+=($base{pG} * log2($base{pG}/$bgd->{'G'}));
310    $bits+=($base{pT} * log2($base{pT}/$bgd->{'T'}));
311    push @pwm, [$base{pA},$base{pC},$base{pG},$base{pT},abs(sprintf("%.3f",$bits))];
312  }
313  return @pwm;
314}
315
316sub _make_pwm {
317  my ($self,$input) = @_;
318  my $count = 1;
319  my %hash;
320  my $bgd = $self->background;
321  #sum up the frequencies at each base pair
322  foreach my $seq(@$input){
323    my $string = $seq->seq;
324    $string =  uc $string;
325    my @motif = split('',$string);
326    my $pos = 1;
327    foreach my $t(@motif){
328      $hash{$pos}{$t}++;
329      $pos++;
330    }
331    $count++;
332  }
333
334  #calculate relative freq
335  my @pwm;
336
337  #decrement last count
338  $count--;
339  foreach my $pos(sort{$a<=>$b} keys %hash){
340    my @array;
341    push @array,($hash{$pos}{'A'}||0)/$count;
342    push @array,($hash{$pos}{'C'}||0)/$count;
343    push @array,($hash{$pos}{'G'}||0)/$count;
344    push @array,($hash{$pos}{'T'}||0)/$count;
345
346    #calculate bits
347    # relative entropy (RelEnt) or Kullback-Liebler distance
348    # relent = sum fk * log2(fk/gk) where fk is frequency of nucleotide k and
349    # gk the background frequency of nucleotide k
350
351    my $bits;
352    $bits+=(($hash{$pos}{'A'}||0) / $count) * log2((($hash{$pos}{'A'}||0)/$count) / ($bgd->{'A'}));
353    $bits+=(($hash{$pos}{'C'}||0) / $count) * log2((($hash{$pos}{'C'}||0)/$count) / ($bgd->{'C'}));
354    $bits+=(($hash{$pos}{'G'}||0) / $count) * log2((($hash{$pos}{'G'}||0)/$count) / ($bgd->{'G'}));
355    $bits+=(($hash{$pos}{'T'}||0) / $count) * log2((($hash{$pos}{'T'}||0)/$count) / ($bgd->{'T'}));
356    push @array, abs(sprintf("%.3f",$bits));
357
358    push @pwm,\@array;
359  }
360  return $self->pwm(\@pwm);
361}
362
363
364###various get/sets
365
366=head2 fontsize
367
368 Title   : fontsize
369 Usage   : $picto->fontsize();
370 Function: get/set for fontsize
371 Returns : int
372 Arguments: int
373
374=cut
375
376sub fontsize {
377  my ($self,$obj) = @_;
378  if($obj){
379    $self->{'_fontsize'} = $obj;
380  }
381  return   $self->{'_fontsize'};
382}
383
384=head2 color
385
386 Title   : color
387 Usage   : $picto->color();
388 Function: get/set for color
389 Returns : a hash reference
390 Arguments: a hash  reference
391
392=cut
393
394sub color {
395  my ($self,$obj) = @_;
396  if($obj){
397    $self->{'_color'} = $obj;
398  }
399  return   $self->{'_color'};
400}
401
402=head2 svg_obj
403
404 Title   : svg_obj
405 Usage   : $picto->svg_obj();
406 Function: get/set for svg_obj
407 Returns : L<SVG>
408 Arguments: L<SVG>
409
410=cut
411
412sub svg_obj {
413  my ($self,$obj) = @_;
414  if($obj){
415    $self->{'_svg_obj'} = $obj;
416  }
417  return   $self->{'_svg_obj'};
418}
419
420=head2 plot_bits
421
422 Title   : plot_bits
423 Usage   : $picto->plot_bits();
424 Function: get/set for plot_bits to indicate whether to plot
425           information content at each base position
426 Returns :1/0
427 Arguments: 1/0
428
429=cut
430
431sub plot_bits {
432  my ($self,$obj) = @_;
433  if($obj){
434    $self->{'_plot_bits'} = $obj;
435  }
436  return   $self->{'_plot_bits'};
437}
438
439=head2 normalize
440
441 Title   : normalize
442 Usage   : $picto->normalize($newval)
443 Function: get/set to make all columns the same height.
444           default is to scale height with information
445           content.
446 Returns : value of normalize (a scalar)
447 Args    : on set, new value (a scalar or undef, optional)
448
449
450=cut
451
452sub normalize{
453    my $self = shift;
454
455    return $self->{'normalize'} = shift if @_;
456    return $self->{'normalize'};
457}
458
459=head2 background
460
461 Title   : background
462 Usage   : $picto->background();
463 Function: get/set for hash reference of nucleodtide bgd frequencies
464 Returns : hash reference
465 Arguments: hash reference
466
467=cut
468
469sub background {
470  my ($self,$obj) = @_;
471  if($obj){
472    $self->{'_background'} = $obj;
473  }
474  return   $self->{'_background'};
475}
476
477=head2 pwm
478
479 Title   : pwm
480 Usage   : $picto->pwm();
481 Function: get/set for pwm
482 Returns : int
483 Arguments: int
484
485=cut
486
487sub pwm {
488  my ($self,$pwm) = @_;
489  if($pwm){
490    $self->{'_pwm'} = $pwm;
491  }
492  return $self->{'_pwm'};
493}
494
495#utility method for returning log 2
496sub log2 {
497    my ($val) = @_;
498    return 0 if $val==0;
499    return log($val)/log(2);
500}
501
502
5031;
504