1# 2# BioPerl module for Bio::Tools::Run::Alignment::Gmap 3# 4# Cared for by George Hartzell <hartzell@alerce.com> 5# 6# Copyright George Hartzell 7# 8# You may distribute this module under the same terms as perl itself 9 10# POD documentation - main docs before the code 11 12=head1 NAME 13 14Bio::Tools::Run::Alignment::Gmap - Wrapper for running gmap. 15 16=head1 SYNOPSIS 17 18 use Bio::Tools::Run::Alignment::Gmap; 19 use Bio::SeqIO; 20 21 my $sio = Bio::SeqIO->new(-file=>$filename ,-format=>'fasta'); 22 my @seq; 23 while(my $seq = $sio->next_seq()){ 24 push @seq,$seq; 25 } 26 my $mapper =Bio::Tools::Run::Gmap->new(); 27 my $result = $mapper->run(\@seq); 28 29 30=head1 DESCRIPTION 31 32Bioperl-run wrapper around gmap. See 33L<http://www.gene.com/share/gmap/> for information about gmap. 34 35It requires a reference to an array of bioperl SeqI objects and 36returns a reference to a filehandle from which the gmap output can be 37read. 38 39One can explicitly set the name of the genome database (defaults to 40NHGD_R36) using the 'genome_db()' method. One can also explicitly set 41the flags that are passed to gmap (defaults to '-f 9 -5 -e') using the 42'flags()' method. 43 44The name of the gmap executable can be overridden using the 45program_name() method and the directory in which to find that 46executable can be overridden using the program_dir() method. 47 48=head1 FEEDBACK 49 50=head2 Mailing Lists 51 52User feedback is an integral part of the evolution of this and other 53Bioperl modules. Send your comments and suggestions preferably to 54the Bioperl mailing list. Your participation is much appreciated. 55 56 bioperl-l@bioperl.org - General discussion 57 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 58 59=head2 Reporting Bugs 60 61Report bugs to the Bioperl bug tracking system to help us keep track 62of the bugs and their resolution. Bug reports can be submitted via 63the web: 64 65 http://redmine.open-bio.org/projects/bioperl/ 66 67=head1 AUTHOR - George Hartzell 68 69Email hartzell@alerce.com 70 71Describe contact details here 72 73=head1 CONTRIBUTORS 74 75Additional contributors names and emails here 76 77=head1 APPENDIX 78 79The rest of the documentation details each of the object methods. 80Internal methods are usually preceded with a _ 81 82=cut 83 84 85# Let the code begin... 86 87# TODO handle stderr output from gmap. 88 89package Bio::Tools::Run::Alignment::Gmap; 90use strict; 91use warnings; 92 93# Object preamble - inherits from Bio::Root::Root 94 95use Bio::Root::Root; 96use Bio::SeqIO; 97 98 99use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase 100 Bio::Factory::ApplicationFactoryI); 101 102=head2 new 103 104 Title : new 105 Usage : my $obj = new Bio::Tools::Run::Alignment::Gmap(); 106 Function: Builds a new Bio::Tools::Run::Alignment::Gmap object 107 Returns : an instance of Bio::Tools::Run::Alignment::Gmap 108 Args : 109 110 111=cut 112 113sub new { 114 my($class,@args) = @_; 115 116 my $self = $class->SUPER::new(@args); 117 $self->{_program_name} = 'gmap'; 118 return $self; 119} 120 121=head2 version 122 123 Title : version 124 Usage : print "gmap version: " . $mapper->version() . "\n"; 125 Function: retrieves and returns the version of the gmap package. 126 Example : 127 Returns : scalar string containing the version number. Probably looks 128 like YYYY-MM-DD. 129 Args : none. 130 131 132=cut 133 134sub version { 135 my ($self,@args) = @_; 136 my $version; 137 138 my $str = $self->executable; 139 $str .= ' --version'; 140 $self->debug("gmap version command = $str\n"); 141 142 open(GMAPRUN, "$str |") || $self->throw($@); 143 144 { 145 local $/ = undef; 146 my $result = <GMAPRUN>; 147 ($version) = ($result =~ m|.*Part of GMAP package, version (.*).*|); 148 } 149 150 return($version); 151 152} 153 154=head2 program_name 155 156 Title : program_name 157 Usage : $mapper->program_name('gmap-dev'); 158 my $pname = $mapper->program_name(); 159 Function: sets/gets the name of the program to run. 160 Returns : string representing the name of the executable. 161 Args : [optional] string representing the name of the executable 162 to set. 163 164=cut 165 166sub program_name { 167 my $self = shift; 168 169 $self->{_program_name} = shift if @_; 170 return $self->{_program_name}; 171} 172 173=head2 program_dir 174 175 Title : program_dir 176 Usage : $mapper->program_dir('/usr/local/sandbox/gmap/bin'); 177 my $pdir = $mapper->program_dir(); 178 Function: sets/gets the directory path in which 179 to find the gmap executable. 180 Returns : string representing the path to the directory. 181 Args : [optional] string representing the directory path to set. 182 183=cut 184 185sub program_dir { 186 my $self = shift; 187 188 $self->{_program_dir} = shift if @_; 189 return $self->{_program_dir}; 190} 191 192=head2 input_file 193 194 Title : input_file 195 Usage : $mapper->input_file('/tmp/moose.fasta'); 196 my $filename = $mapper->input_file(); 197 Function: sets/gets the name of a file containing sequences 198 to be mapped. 199 Returns : string containing the name of the query sequence. 200 Args : [optional] string representing the directory path to set. 201 202=cut 203 204sub input_file { 205 my $self = shift; 206 207 $self->{_input_file} = shift if @_; 208 return $self->{_input_file}; 209} 210 211=head2 genome_db 212 213 Title : genome_db 214 Usage : $mapper->genome_db('NHGD_R36'); 215 my $genome_db = $mapper->genome_db(); 216 Function: sets/gets the name of the genome database, this will be 217 passed to gmap using its '-d' flag. 218 Returns : name of the genome database. 219 Args : [optional] string representing the genome db to set. 220 221=cut 222 223sub genome_db { 224 my $self = shift; 225 226 $self->{_genome_db} = shift if @_; 227 return $self->{_genome_db}; 228} 229 230=head2 flags 231 232 Title : flags 233 Usage : $mapper->flags('-A -e -5'); 234 my $flags = $mapper->flags(); 235 Function: sets/gets the flags that will be passed to gmap. 236 Returns : the current value of the flags that will be passed to gmap. 237 Args : [optional] the flags to set. 238 239=cut 240 241sub flags { 242 my $self = shift; 243 244 $self->{_flags} = shift if @_; 245 return $self->{_flags}; 246} 247 248=head2 run 249 250 Title : run 251 Usage : $mapper->run() 252 Function: runs gmap 253 Example : 254 Returns : a file handle, opened for reading, for gmap's output. 255 Args : An array of references query sequences (as Bio::Seq objects) 256 257 258=cut 259 260sub run { 261 my $self = shift; 262 263 $self->input_file( $self->_build_fasta_input_file(@_) ) if(@_); 264 265 my $result = $self->_run(); 266 267 return $result; 268} 269 270=head2 _build_fasta_input_file 271 272 Title : _build_fasta_input_file 273 Usage : my $seq_file = $self->_build_fasta_input_file(@_); 274 Function: 275 Example : 276 Returns : The name of the temporary file that contains the sequence. 277 Args : A reference to an array of Bio::Seq objects. 278 279=cut 280 281use File::Temp; 282 283sub _build_fasta_input_file { 284 my $self = shift; 285 my $seqs = shift; 286 my $seq_count = 0; 287 288 # the object returned by File::Temp->new() is magic. Used normally 289 # it behaves as a filehandle opened onto the temporary file. Used 290 # as a string it behaves as a string that is the name of the 291 # temporary file. 292 # It is up to the user to remove the when finished with it. 293 my $file_tmp = File::Temp->new( TEMPLATE => 'mvp-gmap-tempfile-XXXXXX', 294 TMPDIR => 1, 295 UNLINK => 0, 296 ); 297 my $seqio = Bio::SeqIO->new( -fh => $file_tmp, -format => 'Fasta' ); 298 299 if (ref($seqs) =~ /ARRAY/i) { 300 foreach my $seq (@$seqs) { 301 throw 302 Bio::Root::BadParameter(-text => 303 "sequence args must be a Bio::SeqI subclass.", 304 ) 305 unless ($seq->isa("Bio::PrimarySeqI")); 306 307 $seqio->write_seq($seq); 308 $seq_count++; 309 } 310 } 311 if ($seq_count == 0) { 312 throw 313 Bio::Root::BadParameter(-text => <<EOM 314You must supply a reference to an array of Bio::SeqI objects. 315EOM 316 ); 317 } 318 319 # pass back the filename of the temporary file (see above) 320 return ("$file_tmp"); 321} 322 323 324sub _run { 325 my $self = shift; 326 327 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null'; 328 my $str = $self->executable; 329 $str .= ' -d' . ($self->genome_db() || 'NHGD_R36'); 330 $str .= ' ' . ($self->flags() || '-f 9 -5 -e'); 331 $str .= ' ' . $self->input_file(); 332 $str .= " 2> $null"; 333 $str .= '; rm -f ' . $self->input_file(); 334 $self->debug("gmap command = $str\n"); 335 336 open(GMAPRUN, "$str |") || $self->throw("Can't open gmap (command = \"$str\"): $!"); 337 338 return (\*GMAPRUN); 339} 340 3411; 342