1#
2# BioPerl module for Bio::Tools::Run::Alignment::Gmap
3#
4# Cared for by George Hartzell <hartzell@alerce.com>
5#
6# Copyright George Hartzell
7#
8# You may distribute this module under the same terms as perl itself
9
10# POD documentation - main docs before the code
11
12=head1 NAME
13
14Bio::Tools::Run::Alignment::Gmap - Wrapper for running gmap.
15
16=head1 SYNOPSIS
17
18  use Bio::Tools::Run::Alignment::Gmap;
19  use Bio::SeqIO;
20
21  my $sio = Bio::SeqIO->new(-file=>$filename ,-format=>'fasta');
22  my @seq;
23  while(my $seq = $sio->next_seq()){
24    push @seq,$seq;
25  }
26  my $mapper =Bio::Tools::Run::Gmap->new();
27  my $result = $mapper->run(\@seq);
28
29
30=head1 DESCRIPTION
31
32Bioperl-run wrapper around gmap.  See
33L<http://www.gene.com/share/gmap/> for information about gmap.
34
35It requires a reference to an array of bioperl SeqI objects and
36returns a reference to a filehandle from which the gmap output can be
37read.
38
39One can explicitly set the name of the genome database (defaults to
40NHGD_R36) using the 'genome_db()' method.  One can also explicitly set
41the flags that are passed to gmap (defaults to '-f 9 -5 -e') using the
42'flags()' method.
43
44The name of the gmap executable can be overridden using the
45program_name() method and the directory in which to find that
46executable can be overridden using the program_dir() method.
47
48=head1 FEEDBACK
49
50=head2 Mailing Lists
51
52User feedback is an integral part of the evolution of this and other
53Bioperl modules. Send your comments and suggestions preferably to
54the Bioperl mailing list.  Your participation is much appreciated.
55
56  bioperl-l@bioperl.org                  - General discussion
57  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
58
59=head2 Reporting Bugs
60
61Report bugs to the Bioperl bug tracking system to help us keep track
62of the bugs and their resolution. Bug reports can be submitted via
63the web:
64
65  http://redmine.open-bio.org/projects/bioperl/
66
67=head1 AUTHOR - George Hartzell
68
69Email hartzell@alerce.com
70
71Describe contact details here
72
73=head1 CONTRIBUTORS
74
75Additional contributors names and emails here
76
77=head1 APPENDIX
78
79The rest of the documentation details each of the object methods.
80Internal methods are usually preceded with a _
81
82=cut
83
84
85# Let the code begin...
86
87# TODO handle stderr output from gmap.
88
89package Bio::Tools::Run::Alignment::Gmap;
90use strict;
91use warnings;
92
93# Object preamble - inherits from Bio::Root::Root
94
95use Bio::Root::Root;
96use Bio::SeqIO;
97
98
99use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase
100            Bio::Factory::ApplicationFactoryI);
101
102=head2 new
103
104 Title   : new
105 Usage   : my $obj = new Bio::Tools::Run::Alignment::Gmap();
106 Function: Builds a new Bio::Tools::Run::Alignment::Gmap object
107 Returns : an instance of Bio::Tools::Run::Alignment::Gmap
108 Args    :
109
110
111=cut
112
113sub new {
114  my($class,@args) = @_;
115
116  my $self = $class->SUPER::new(@args);
117  $self->{_program_name} = 'gmap';
118  return $self;
119}
120
121=head2 version
122
123 Title   : version
124 Usage   : print "gmap version: " . $mapper->version() . "\n";
125 Function: retrieves and returns the version of the gmap package.
126 Example :
127 Returns : scalar string containing the version number.  Probably looks
128           like YYYY-MM-DD.
129 Args    : none.
130
131
132=cut
133
134sub version {
135   my ($self,@args) = @_;
136   my $version;
137
138   my $str = $self->executable;
139   $str .= ' --version';
140   $self->debug("gmap version command = $str\n");
141
142   open(GMAPRUN, "$str |") || $self->throw($@);
143
144   {
145       local $/ = undef;
146       my $result = <GMAPRUN>;
147       ($version) = ($result =~ m|.*Part of GMAP package, version (.*).*|);
148   }
149
150   return($version);
151
152}
153
154=head2 program_name
155
156 Title   : program_name
157 Usage   : $mapper->program_name('gmap-dev');
158           my $pname = $mapper->program_name();
159 Function: sets/gets the name of the program to run.
160 Returns : string representing the name of the executable.
161 Args    : [optional] string representing the name of the executable
162           to set.
163
164=cut
165
166sub program_name {
167   my $self = shift;
168
169   $self->{_program_name} = shift if @_;
170   return $self->{_program_name};
171}
172
173=head2 program_dir
174
175 Title   : program_dir
176 Usage   : $mapper->program_dir('/usr/local/sandbox/gmap/bin');
177           my $pdir = $mapper->program_dir();
178 Function: sets/gets the directory path in which
179           to find the gmap executable.
180 Returns : string representing the path to the directory.
181 Args    : [optional] string representing the directory path to set.
182
183=cut
184
185sub program_dir {
186  my $self = shift;
187
188  $self->{_program_dir} = shift if @_;
189  return $self->{_program_dir};
190}
191
192=head2 input_file
193
194 Title   : input_file
195 Usage   : $mapper->input_file('/tmp/moose.fasta');
196           my $filename = $mapper->input_file();
197 Function: sets/gets the name of a file containing sequences
198           to be mapped.
199 Returns : string containing the name of the query sequence.
200 Args    : [optional] string representing the directory path to set.
201
202=cut
203
204sub input_file {
205  my $self = shift;
206
207  $self->{_input_file} = shift if @_;
208  return $self->{_input_file};
209}
210
211=head2 genome_db
212
213 Title   : genome_db
214 Usage   : $mapper->genome_db('NHGD_R36');
215           my $genome_db = $mapper->genome_db();
216 Function: sets/gets the name of the genome database, this will be
217           passed to gmap using its '-d' flag.
218 Returns : name of the genome database.
219 Args    : [optional] string representing the genome db to set.
220
221=cut
222
223sub genome_db {
224  my $self = shift;
225
226  $self->{_genome_db} = shift if @_;
227  return $self->{_genome_db};
228}
229
230=head2 flags
231
232 Title   : flags
233 Usage   : $mapper->flags('-A -e -5');
234           my $flags = $mapper->flags();
235 Function: sets/gets the flags that will be passed to gmap.
236 Returns : the current value of the flags that will be passed to gmap.
237 Args    : [optional] the flags to set.
238
239=cut
240
241sub flags {
242  my $self = shift;
243
244  $self->{_flags} = shift if @_;
245  return $self->{_flags};
246}
247
248=head2 run
249
250 Title   : run
251 Usage   : $mapper->run()
252 Function: runs gmap
253 Example :
254 Returns : a file handle, opened for reading, for gmap's output.
255 Args    : An array of references query sequences (as Bio::Seq objects)
256
257
258=cut
259
260sub run {
261  my $self = shift;
262
263  $self->input_file( $self->_build_fasta_input_file(@_) ) if(@_);
264
265  my $result = $self->_run();
266
267  return $result;
268}
269
270=head2 _build_fasta_input_file
271
272 Title   : _build_fasta_input_file
273 Usage   : my $seq_file = $self->_build_fasta_input_file(@_);
274 Function:
275 Example :
276 Returns : The name of the temporary file that contains the sequence.
277 Args    : A reference to an array of Bio::Seq objects.
278
279=cut
280
281use File::Temp;
282
283sub _build_fasta_input_file {
284  my $self = shift;
285  my $seqs = shift;
286  my $seq_count = 0;
287
288  # the object returned by File::Temp->new() is magic.  Used normally
289  # it behaves as a filehandle opened onto the temporary file.  Used
290  # as a string it behaves as a string that is the name of the
291  # temporary file.
292  # It is up to the user to remove the when finished with it.
293  my $file_tmp = File::Temp->new( TEMPLATE => 'mvp-gmap-tempfile-XXXXXX',
294				  TMPDIR => 1,
295				  UNLINK => 0,
296				);
297  my $seqio = Bio::SeqIO->new( -fh => $file_tmp, -format => 'Fasta' );
298
299  if (ref($seqs) =~ /ARRAY/i) {
300    foreach my $seq (@$seqs) {
301      throw
302	Bio::Root::BadParameter(-text =>
303				"sequence args must be a Bio::SeqI subclass.",
304			       )
305	    unless ($seq->isa("Bio::PrimarySeqI"));
306
307      $seqio->write_seq($seq);
308      $seq_count++;
309    }
310  }
311  if ($seq_count == 0) {
312    throw
313      Bio::Root::BadParameter(-text => <<EOM
314You must supply a reference to an array of Bio::SeqI objects.
315EOM
316			     );
317  }
318
319  # pass back the filename of the temporary file (see above)
320  return ("$file_tmp");
321}
322
323
324sub _run {
325  my $self = shift;
326
327  my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
328  my $str = $self->executable;
329  $str .= ' -d' . ($self->genome_db() || 'NHGD_R36');
330  $str .= ' ' . ($self->flags() || '-f 9 -5 -e');
331  $str .= ' ' . $self->input_file();
332  $str .= " 2> $null";
333  $str .= '; rm -f ' . $self->input_file();
334  $self->debug("gmap command = $str\n");
335
336  open(GMAPRUN, "$str |") || $self->throw("Can't open gmap (command = \"$str\"): $!");
337
338  return (\*GMAPRUN);
339}
340
3411;
342