1# 2# BioPerl module for Bio::Tools::QRNA 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Cared for by Jason Stajich <jason-at-bioperl-dot-org> 7# 8# Copyright Jason Stajich 9# 10# You may distribute this module under the same terms as perl itself 11 12# POD documentation - main docs before the code 13 14=head1 NAME 15 16Bio::Tools::QRNA - A Parser for qrna output 17 18=head1 SYNOPSIS 19 20 use Bio::Tools::QRNA; 21 my $parser = Bio::Tools::QRNA->new(-file => $qrnaoutput); 22 while( my $feature = $parser->next_feature ) { 23 # do something here 24 } 25 26=head1 DESCRIPTION 27 28Parses QRNA output (E.Rivas: 29http://selab.janelia.org/software.html 30ftp://selab.janelia.org/pub/software/qrna/). 31 32This module is not complete, but currently it packs information from 33each QRNA alignment into a single Bio::SeqFeature::Generic object. 34 35Not all options for QRNA output have been tested or tried. It has 36been tested on sliding window output (-w -x) and shuffled output (-b 37or -B). 38 39See t/QRNA.t for example usage. 40 41At some point we may have more complicated feature object which will 42support this data rather than forcing most of the information into 43tag/value pairs in a SeqFeature::Generic. 44 45Running with -verbose =E<gt> 1 will store extra data in the feature. The 46entire unparsed entry for a particular feature will be stored as a 47string in the tag 'entry' it is accessible via: 48 49 my ($entry) = $f->each_tag_value('entry'); 50 51The winning model for any given alignment test will be the name stored 52in the primary_tag field of feature. The bit score will stored in the 53score field. The logoddpost is available via the a tag/value pair. 54This example code will show how to print out the score and log odds 55post for each model. 56 57 # assuming you got a feature already 58 print "model score logoddspost\n"; 59 foreach my $model ( qw(OTH COD RNA) ) { 60 my ($score) = $f->get_tag_values("$model\_score"); 61 my ($logoddspost) = $f->get_tag_values("$model\_logoddspost"); 62 print "$model $score $logoddspost\n"; 63 } 64 65The start and end of the alignment for both the query and hit sequence 66are available through the L<Bio::SeqFeature::FeaturePair> interface, 67specifically L<Bio::SeqFeature::FeaturePair::feature1> and 68L<Bio::SeqFeature::FeaturePair::feature2>. Additionally if you have 69run QRNA with an input file which has the location of the alignment 70stored in the FASTA filename as in (ID/START-END) which is the default 71output format from L<Bio::AlignIO::fasta> produced alignment output, 72this module will re-number start/end for the two sequences so they are 73in the actual coordinates of the sequence rather than the relative 74coordinates of the alignment. You may find the bioperl utillity 75script search2alnblocks useful in creating your input files for QRNA. 76 77Some other words of warning, QRNA uses a 0 based numbering system for 78sequence locations, Bioperl uses a 1 based system. You'll notice that 79locations will be +1 they are reported in the raw QRNA output. 80 81=head1 FEEDBACK 82 83=head2 Mailing Lists 84 85User feedback is an integral part of the evolution of this and other 86Bioperl modules. Send your comments and suggestions preferably to 87the Bioperl mailing list. Your participation is much appreciated. 88 89 bioperl-l@bioperl.org - General discussion 90 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 91 92=head2 Support 93 94Please direct usage questions or support issues to the mailing list: 95 96I<bioperl-l@bioperl.org> 97 98rather than to the module maintainer directly. Many experienced and 99reponsive experts will be able look at the problem and quickly 100address it. Please include a thorough description of the problem 101with code and data examples if at all possible. 102 103=head2 Reporting Bugs 104 105Report bugs to the Bioperl bug tracking system to help us keep track 106of the bugs and their resolution. Bug reports can be submitted via 107the web: 108 109 https://github.com/bioperl/bioperl-live/issues 110 111=head1 AUTHOR - Jason Stajich 112 113Email jason-at-bioperl-dot-org 114 115=head1 APPENDIX 116 117The rest of the documentation details each of the object methods. 118Internal methods are usually preceded with a _ 119 120=cut 121 122 123# Let the code begin... 124 125 126package Bio::Tools::QRNA; 127$Bio::Tools::QRNA::VERSION = '1.7.7'; 128use vars qw(@Models); 129use strict; 130 131use Bio::SeqFeature::Generic; 132use Bio::SeqFeature::FeaturePair; 133 134use base qw(Bio::Root::IO Bio::SeqAnalysisParserI); 135@Models = qw(OTH COD RNA); 136 137=head2 new 138 139 Title : new 140 Usage : my $obj = Bio::Tools::QRNA->new(); 141 Function: Builds a new Bio::Tools::QRNA object 142 Returns : an instance of Bio::Tools::QRNA 143 Args : -fh/-file filehandle/filename standard input for 144 Bio::Root:IO objects 145 146=cut 147 148=head2 next_feature 149 150 Title : next_feature 151 Usage : my $feature = $parser->next_feature 152 Function: Get the next QRNA feature 153 Returns : 154 Args : 155 156 157=cut 158 159sub next_feature { 160 my ($self) = @_; 161 my $f = shift @{$self->{'_parsed_features'} || []}; 162 if( ! defined $f && $self->_parse_pair ) { 163 $f = shift @{$self->{'_parsed_features'} || []}; 164 } 165 return $f; 166} 167 168sub _parse_pair { 169 my ($self,@args) = @_; 170 my (@features,%data); 171 my $seenstart = 0; 172 while( defined( $_ = $self->_readline) ) { 173 next if( /^\#\-\-/o ); 174 if( /^\#\s+(qrna)\s+(\S+)\s+\(([^\)]+)\)/o ) { 175 $self->program_name($1); 176 $self->program_version($2); 177 $self->program_date($3); 178 } elsif( /^\#\s+(PAM model)\s+\=\s+(.+)\s+$/o ) { 179 $self->PAM_model($2); 180 } elsif( /^\#\s+(RNA model)\s+\=\s+(\S+)/o ) { 181 $self->RNA_model($2); 182 } elsif( /^\#\s+(seq file)\s+\=\s+(.+)\s+$/o ) { 183 $self->seq_file($2); 184 } elsif( /^\#\s+(\d+)\s+\[([\-+])\s+strand\]/o ) { 185 if( $seenstart ) { 186 if( $data{'alignment_len'} ) { 187 push @features, $self->_make_feature(\%data); 188 } 189 $self->_pushback($_); 190 last; 191 } 192 $seenstart = 1; 193 } elsif( /^\#/ ) { 194 next; 195 } elsif( />(\S+)\s+\((\d+)\)/ ) { 196 if( @{$data{'seqs'} || []} == 2 ) { 197 $self->warn( "already seen seqs ".join(' ', ,map { $_->[0] } 198 @{$data{'seqs'}}). "\n"); 199 } else { 200 push @{$data{'seqs'}}, [$1,$2]; 201 } 202 } elsif( /^length alignment:\s+(\d+)\s+\(id\=(\d+(\.\d+)?)\)/o ) { 203 204 if( $data{'alignment_len'} ) { 205 push @features, $self->_make_feature(\%data); 206 # reset all the data but the 'seqs' field 207 %data = ( 'seqs' => $data{'seqs'} ); 208 } 209 210 if( /\(((sre_)?shuffled)\)/ ) { 211 $data{'shuffled'} = $1; 212 } 213 $data{'alignment_len'} = $1; 214 $data{'alignment_pid'} = $2; 215 } elsif ( /^pos([XY]):\s+(\d+)\-(\d+)\s+\[(\d+)\-(\d+)\]\((\d+)\)\s+ 216 \-\-\s+\((\S+\s+\S+\s+\S+\s+\S+)\)/ox ) { 217 $data{"seq\_$1"}->{'aln'} = [ $2,$3, $4,$5, $6]; 218 @{$data{"seq\_$1"}->{'base_comp'}} = split(/\s+/,$7); 219 } elsif( /^winner\s+\=\s+(\S{3})/ ) { 220 $data{'winning_model'} = $1; 221 } elsif( /^(\S{3})\s+ends\s+\=\s+(\-?\d+)\s+(\-?\d+)/ ) { 222 # QRNA is 0-based 223 # Bioperl is 1 based 224 $data{'model_location'}->{$1} = [ $2,$3 ]; 225 } elsif( /^\s+(logoddspost)?OTH\s+\=\s+/ox ) { 226 while( /(\S+)\s+\=\s+(\-?\d+(\.\d+))/g ) { 227 my ($model,$score)= ($1,$2); 228 if( $model =~ s/^logoddspost// ) { 229 $data{'model_scores'}->{'logoddspost'}->{$model} = $score; 230 } else { 231 $data{'model_scores'}->{'bits'}->{$model} = $score; 232 } 233 } 234 } 235 $data{'entry'} .= $_; 236 } 237 if( @features ) { 238 push @{$self->{'_parsed_features'}}, @features; 239 return scalar @features; 240 } 241 return 0; 242} 243 244=head2 PAM_model 245 246 Title : PAM_model 247 Usage : $obj->PAM_model($newval) 248 Function: 249 Example : 250 Returns : value of PAM_model (a scalar) 251 Args : on set, new value (a scalar or undef, optional) 252 253 254=cut 255 256sub PAM_model{ 257 my $self = shift; 258 return $self->{'PAM_model'} = shift if @_; 259 return $self->{'PAM_model'}; 260} 261 262=head2 RNA_model 263 264 Title : RNA_model 265 Usage : $obj->RNA_model($newval) 266 Function: 267 Example : 268 Returns : value of RNA_model (a scalar) 269 Args : on set, new value (a scalar or undef, optional) 270 271 272=cut 273 274sub RNA_model{ 275 my $self = shift; 276 277 return $self->{'RNA_model'} = shift if @_; 278 return $self->{'RNA_model'}; 279} 280 281=head2 seq_file 282 283 Title : seq_file 284 Usage : $obj->seq_file($newval) 285 Function: 286 Example : 287 Returns : value of seq_file (a scalar) 288 Args : on set, new value (a scalar or undef, optional) 289 290 291=cut 292 293sub seq_file{ 294 my $self = shift; 295 296 return $self->{'seq_file'} = shift if @_; 297 return $self->{'seq_file'}; 298} 299 300 301=head2 program_name 302 303 Title : program_name 304 Usage : $obj->program_name($newval) 305 Function: 306 Example : 307 Returns : value of program_name (a scalar) 308 Args : on set, new value (a scalar or undef, optional) 309 310 311=cut 312 313sub program_name{ 314 my $self = shift; 315 316 return $self->{'program_name'} = shift if @_; 317 return $self->{'program_name'} || 'qrna'; 318} 319 320=head2 program_version 321 322 Title : program_version 323 Usage : $obj->program_version($newval) 324 Function: 325 Example : 326 Returns : value of program_version (a scalar) 327 Args : on set, new value (a scalar or undef, optional) 328 329 330=cut 331 332sub program_version{ 333 my $self = shift; 334 335 return $self->{'program_version'} = shift if @_; 336 return $self->{'program_version'}; 337} 338 339=head2 program_date 340 341 Title : program_date 342 Usage : $obj->program_date($newval) 343 Function: 344 Example : 345 Returns : value of program_date (a scalar) 346 Args : on set, new value (a scalar or undef, optional) 347 348 349=cut 350 351sub program_date{ 352 my $self = shift; 353 return $self->{'program_date'} = shift if @_; 354 return $self->{'program_date'}; 355} 356 357sub _make_feature { 358 my ($self,$data) = @_; 359 my ($qoffset,$hoffset) = (1,1); 360 # when you run qrna and have produced ID/START-END 361 # formatted input strings we can remap the location 362 # to the original 363 364 # name is stored as the first entry in the seq array ref 365 my ($qid,$hid) = ( $data->{'seqs'}->[0]->[0], 366 $data->{'seqs'}->[1]->[0]); 367 if( $qid =~ /(\S+)\/(\d+)\-(\d+)/ ) { 368 ($qid,$qoffset) = ($1,$2); 369 } 370 if( $hid =~ /(\S+)\/(\d+)\-(\d+)/ ) { 371 ($hid,$hoffset) = ($1,$2); 372 } 373 374 my $f = Bio::SeqFeature::FeaturePair->new(); 375 376 my ($s,$e) = @{$data->{'model_location'}->{$data->{'winning_model'}}}; 377 my $qf = Bio::SeqFeature::Generic->new 378 ( -primary_tag => $data->{'winning_model'}, 379 -source_tag => $self->program_name, 380 -score => $data->{'model_scores'}->{'bits'}->{$data->{'winning_model'}}, 381 -start => $s+$qoffset, 382 -end => $e+$qoffset, 383 -seq_id => $qid, 384 -strand => ($s < $e ) ? 1 : -1, 385 ); 386 387 my $hf = Bio::SeqFeature::Generic->new 388 ( -primary_tag => $qf->primary_tag, 389 -source_tag => $qf->source_tag, 390 -score => $qf->score, 391 -seq_id => $hid, 392 -start => $s + $hoffset, 393 -end => $e + $hoffset, 394 -strand => $qf->strand, 395 ); 396 $f->feature1($qf); 397 $f->feature2($hf); 398 $f->add_tag_value('alignment_len', $data->{'alignment_len'}); 399 $f->add_tag_value('alignment_pid', $data->{'alignment_pid'}); 400 # store the other model scores and data 401 foreach my $model ( @Models ) { 402 $f->add_tag_value("$model\_score", $data->{'model_scores'}->{'bits'}->{$model}); 403 $f->add_tag_value("$model\_logoddspost", $data->{'model_scores'}->{'logoddspost'}->{$model}); 404 if( ! $data->{'model_location'}->{$model} ) { 405 if( $self->verbose > 0 ) { 406 $self->debug( $data->{'entry'} ); 407 } 408 $self->throw("no location parsed for $model in ", 409 (map { @$_ } @{$data->{'seqs'}}), " ", $f->start, " ", $f->end); 410 } else { 411 $f->add_tag_value("$model\_positions", 412 join("..",@{$data->{'model_location'}->{$model} })); 413 } 414 } 415 # probably a better way to store this - as 416 # a seq object perhaps 417 $f->add_tag_value('seq1', @{$data->{'seqs'}->[0]}); 418 $f->add_tag_value('seq2', @{$data->{'seqs'}->[1]}); 419 $f->add_tag_value('entry', $data->{'entry'}) if $self->verbose > 0; 420 if( $data->{'shuffled'} ) { 421 $f->add_tag_value('shuffled', $data->{'shuffled'}); 422 } 423 return $f; 424} 4251; 426