1#
2# BioPerl module for Bio::Tools::Run::Phylo::LVB
3#
4# Created by Daniel Barker, based on ProtPars.pm by Shawn Hoon
5#
6# You may distribute this module under the same terms as perl itself
7#
8# POD documentation - main docs before the code
9
10=head1 NAME
11
12Bio::Tools::Run::Phylo::LVB - Object for using the LVB program to create
13an array of L<Bio::Tree> objects from a nucleotide multiple alignment
14file or a nucleotide SimpleAlign object. Works with LVB version 2.1.
15
16=head1 SYNOPSIS
17
18  use Bio::Tools::Run::Phylo::LVB;
19
20  # Create a SimpleAlign object.
21  # NOTE. Aligning nucleotide sequence directly, as below, makes
22  # sense for non-coding nucleotide sequence (e.g., structural RNA
23  # genes, introns, ITS). For protein-coding genes, to prevent
24  # Clustal intronducing frameshifts one should instead align the
25  # translations of the genes, then convert the multiple alignment
26  # to nucleotide by referring to the corresponding transcript
27  # sequences (e.g., using EMBOSS tranalign).
28  use Bio::Tools::Run::Alignment::Clustalw;
29  $aln_factory = Bio::Tools::Run::Alignment::Clustalw->new(quiet => 1);
30  $inputfilename = "/Users/daniel/nuc.fa";
31  $aln = $aln_factory->align($inputfilename);
32
33  # Create the tree or trees.
34  $tree_factory = Bio::Tools::Run::Phylo::LVB->new(quiet => 1);
35  @trees = $tree_factory->run($aln);
36
37  # Or one can pass in a file name containing a nucleotide multiple
38  # alignment in Phylip 3.6 format:
39  $tree_factory = Bio::Tools::Run::Phylo::LVB->new(quiet => 1);
40  $tree = $tree_factory->run("/Users/daniel/nuc.phy");
41
42=head1 DESCRIPTION
43
44Wrapper for LVB, which uses a simulated annealing heuristic search
45to seek parsimonious trees from a nucleotide multiple alignment.
46
47=head1 FEEDBACK
48
49=head2 Mailing Lists
50
51User feedback is an integral part of the evolution of this and other
52Bioperl modules. Send your comments and suggestions preferably to one
53of the Bioperl mailing lists.  Your participation is much appreciated.
54
55  bioperl-l@bioperl.org                  - General discussion
56  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
57
58=head2 Support
59
60Please direct usage questions or support issues to the mailing list:
61
62I<bioperl-l@bioperl.org>
63
64rather than to the module maintainer directly. Many experienced and
65reponsive experts will be able look at the problem and quickly
66address it. Please include a thorough description of the problem
67with code and data examples if at all possible.
68
69=head2 Reporting Bugs
70
71Report bugs to the Bioperl bug tracking system to help us keep track
72the bugs and their resolution.  Bug reports can be submitted via the
73web:
74
75  http://redmine.open-bio.org/projects/bioperl/
76
77=head1 PARAMETERS FOR LVB COMPUTATION
78
79=head2 FORMAT
80
81  Title       : FORMAT
82  Description : (optional)
83                When running LVB from a Phylip 3.6-format
84                multiple alignment file, this specifies
85                the layout of the file. It may be
86                "interleaved" or "sequential". FORMAT is
87                automatically set to "interleaved" if
88                running from a SimpleAlign object.
89                Defaults to "interleaved".
90
91=head2 GAPS
92
93  Title       : GAPS
94  Description : (optional)
95                LVB can treat gaps represented in the
96                multiple alignment by "-" as either
97                "fifthstate" or "unknown". "fifthstate"
98                regards "-" as equivalent to "O", which
99                is an unambiguous character state
100                distinct from all nucleotides. "unknown"
101                regards "-" as equivalent to "?", which
102                is as an ambiguous site that may contain
103                "A" or "C" or "G" or "T" or "O".
104                Defaults to "unknown".
105
106=head2 SEED
107
108  Title       : SEED
109  Description : (optional)
110                This specifies the random number seed
111                for LVB. SEED must be an integer in the
112                range 0 to 900000000 inclusive. If no
113                seed is specified, LVB takes a seed from
114                the system clock. By default, no seed is
115                specified.
116
117=head2 DURATION
118
119  Title       : DURATION
120  Description : (optional)
121                This specifies the duration of the
122                analysis, which may be "fast" or "slow".
123                "slow" causes LVB to perform a more
124                thorough and more time-consuming search
125                than "fast". Defaults to "slow".
126
127=head2 BOOTSTRAPS
128
129  Title       : BOOTSTRAPS
130  Description : (optional)
131                This specifies the number of bootstrap
132                replicates to use, which must be a
133                positive integer. Set bootstraps to 0 for
134                no bootstrapping. Defaults to 0.
135
136=head1 AUTHOR
137
138Daniel Barker
139
140=head1 CONTRIBUTORS
141
142Email jason-AT-bioperl_DOT_org
143
144=head1 APPENDIX
145
146The rest of the documentation details each of the object
147methods. Internal methods are usually preceded with a _
148
149=cut
150
151#'
152
153package Bio::Tools::Run::Phylo::LVB;
154
155use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
156	    @LVB_PARAMS @OTHER_SWITCHES
157	    %OK_FIELD);
158use strict;
159use Bio::SimpleAlign;
160use Cwd;
161use Bio::AlignIO;
162use Bio::TreeIO;
163use Bio::Root::Root;
164use Bio::Tools::Run::WrapperBase;
165use Bio::Root::IO;
166use File::Copy;
167
168@ISA = qw(Bio::Root::Root  Bio::Tools::Run::WrapperBase);
169
170# You will need to enable the LVB program.
171# You can set the path to the program through doing:
172# my @params('executable'=>'/usr/local/bin/lvb');
173# my $lvb_factory = Bio::Tools::Run::Phylo::LVB->new(@params);
174#
175
176BEGIN {
177    # NOTE. The order of the members of @LVB_PARAMS is vital!
178    @LVB_PARAMS = qw(FORMAT GAPS SEED DURATION BOOTSTRAPS);
179    @OTHER_SWITCHES = qw(QUIET);
180    foreach my $attr(@LVB_PARAMS, @OTHER_SWITCHES) {
181        $OK_FIELD{$attr}++;
182    }
183}
184
185=head2 program_name
186
187 Title   : program_name
188 Usage   : ->program_name()
189 Function: holds the program name
190 Returns:  string
191 Args    : None
192
193=cut
194
195sub program_name {
196            return 'lvb';
197}
198
199=head2 program_dir
200
201 Title   : program_dir
202 Usage   : ->program_dir()
203 Function: returns undef
204 Args    :
205
206=cut
207
208sub program_dir {
209    return undef;
210}
211
212sub new {
213    my ($class,@args) = @_;
214    my $self = $class->SUPER::new(@args);
215
216    # set defaults
217    $self->FORMAT("interleaved");
218    $self->GAPS("unknown");
219    $self->SEED("");
220    $self->DURATION("slow");
221    $self->BOOTSTRAPS(0);
222
223    # re-set with user's values where specified
224    my ($attr, $value);
225    while (@args)  {
226	$attr =   shift @args;
227	$value =  shift @args;
228	next if( $attr =~ /^-/ ); # don't want named parameters
229        $self->$attr($value);
230    }
231    return $self;
232}
233
234sub AUTOLOAD {
235    my $self = shift;
236    my $attr = $AUTOLOAD;
237    $attr =~ s/.*:://;
238    $attr = uc $attr;
239    $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
240    $self->{$attr} = shift if @_;
241    return $self->{$attr};
242}
243
244=head2  run
245
246 Title   : run
247 Usage   :
248	$inputfilename = '/Users/daniel/nuc.phy';
249	@trees = $factory->run($inputfilename);
250 Function: Create one or more LVB trees from a SimpleAlign
251           object or a file containing a Phylip 3.6-format
252           nucleotide multiple alignment.
253 Example :
254 Returns : Array of L<Bio::Tree> objects
255 Args    : Name of a file containing a nucleotide multiple
256           alignment in Phylip 3.6 format, or a SimpleAlign
257           object
258
259=cut
260
261sub run{
262
263    my ($self,$input) = @_;
264    my ($infilename);
265
266    # Create input file pointer
267    $infilename = $self->_setinput($input);
268    if (!$infilename) {$self->throw("Problems setting up for lvb. Probably bad input data in $input !");}
269
270    # Create parameter string to pass to lvb program
271    my $param_string = $self->_setparams();
272
273    # run lvb
274    my @trees = $self->_run($infilename,$param_string);
275}
276
277=head2  create_tree
278
279 Title   : create_tree
280 Usage   :
281        $inputfilename = '/Users/daniel/nuc.phy';
282        @trees = $factory->create_tree($inputfilename);
283 Function: Create one or more LVB trees from a SimpleAlign
284           object or a file containing a Phylip 3.6-format
285           nucleotide multiple alignment.
286 Example :
287 Returns : Array of L<Bio::Tree> objects
288 Args    : Name of a file containing a nucleotide multiple
289           alignment in Phylip 3.6 format, or a SimpleAlign
290           object
291
292=cut
293
294sub create_tree{
295  return shift->run(@_);
296}
297
298#################################################
299
300=head2  _run
301
302 Title   : _run
303 Usage   : Internal function, not to be called directly
304 Function:  makes actual system call to lvb program
305 Example :
306 Returns : Array of Bio::Tree objects
307 Args    : Name of a file containing a multiple alignment
308           in Phylip 3.6 format and a parameter string to be
309           passed to LVB
310
311=cut
312
313sub _run {
314    my ($self,$infile,$param_string) = @_;
315    return unless(  $self->executable );
316
317    my $instring;
318    my $curpath = cwd;
319    unless( File::Spec->file_name_is_absolute($infile) ) {
320	$infile = $self->io->catfile($curpath,$infile);
321    }
322    $instring =  $param_string;
323    $self->debug( "Program ".$self->executable || ''."\n");
324
325    # create LVB's working copy of the input file, which must be named "infile"
326    # NOTE, we cut trailing spaces since they can cause trouble with LVB 2.1
327    my $lvb_infile = $self->tempdir . "/infile";
328    open(LVB_SUB_RUN_TMP_IN_FH, "$infile");
329    open(LVB_SUB_RUN_TMP_OUT_FH, ">$lvb_infile");
330    while (<LVB_SUB_RUN_TMP_IN_FH>) {
331	s/ +$//;
332        print LVB_SUB_RUN_TMP_OUT_FH
333	    or $self->throw("output error on $lvb_infile");
334    }
335    chdir($self->tempdir);
336    #open a pipe to run lvb to bypass interactive menus
337    if ($self->quiet() || $self->verbose() < 0) {
338	my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
339    	open(LVB_PIPE,"|".$self->executable.">$null");
340    }
341    else {
342    	open(LVB_PIPE,"|".$self->executable);
343    }
344    print LVB_PIPE $instring;
345    close(LVB_PIPE);
346    chdir($curpath);
347    #get the results
348    my $treefile = $self->tempdir . "/outtree";
349
350    $self->throw("LVB did not create treefile correctly")
351  	unless (-e $treefile);
352
353    #create the trees
354    my $in  = Bio::TreeIO->new(-file => $treefile, '-format' => 'newick');
355    my @trees = ();
356    while (my $tree = $in->next_tree()) {
357	push @trees, $tree;
358    }
359
360    unless ( $self->save_tempfiles ) {
361	# Clean up the temporary files created along the way...
362	unlink $lvb_infile;
363	unlink $treefile;
364    }
365    return @trees;
366}
367
368=head2  _setinput
369
370 Title   :  _setinput
371 Usage   :  Internal function, not to be called directly
372 Function:   Create input file for lvb program
373 Example :
374 Returns : name of file containing a multiple alignment in
375           Phylip 3.6 format
376 Args    : SimpleAlign object reference or input file name
377
378=cut
379
380sub _setinput {
381    my ($self, $input, $suffix) = @_;
382    my ($alnfilename,$infilename, $temp, $tfh,$input_tmp,$input_fh);
383
384    # If $input is not a  reference it better be the name of a
385    # file with the sequence/
386
387    #  a phy formatted alignment file
388    unless (ref $input) {
389        # check that file exists or throw
390        $alnfilename= $input;
391        unless (-e $input) {return 0;}
392	return $alnfilename;
393    }
394
395    #  $input may be a SimpleAlign Object
396    if ($input->isa("Bio::Align::AlignI")) {
397        #  Open temporary file for both reading & writing of BioSeq array
398	($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir);
399	my $alnIO = Bio::AlignIO->new(-fh => $tfh, -format=>'phylip',idlength=>$10);
400	$alnIO->write_aln($input);
401	$alnIO->close();
402	close($tfh);
403	$tfh = undef;
404	unless ($self->format() =~ /^interleaved$/i) {
405	    $self->warn("resetting LVB format to interleaved");
406	    $self->format("interleaved");
407	}
408	return $alnfilename;
409    }
410    return 0;
411}
412
413=head2  _setparams
414
415 Title   :  _setparams
416 Usage   :  Internal function, not to be called directly
417 Function:   Create parameter inputs for lvb program
418 Example :
419 Returns : parameter string to be passed to LVB
420 Args    : name of calling object
421
422=cut
423
424sub _setparams {
425    my ($attr, $value, $self);
426
427    $self = shift;
428    my $param_string = "";
429
430    for $attr (@LVB_PARAMS) {
431        $value = $self->$attr();
432	if ($attr =~/SEED/i) {
433	    $value = "" unless defined $value;
434	    $param_string .= "$value\n";
435	} elsif ($attr  =~ /BOOTSTRAPS/i) {
436	    $value = 0 unless defined $value;
437	    $param_string .= "$value\n";
438	} else {	# we want I for "interleaved" or S for "sequential",
439                        # U for "unknown" or F for "fifthstate",
440			# F for "fast" or S for "slow"
441	    $param_string .= uc(substr $value, 0, 1) . "\n";
442	}
443    }
444
445    return $param_string;
446}
447
4481; # Needed to keep compiler happy
449