1#
2# Copyright Balamurugan Kumarasamy
3#
4# You may distribute this module under the same terms as perl itself
5# POD documentation - main docs before the code
6
7=head1 NAME
8
9Bio::Tools::Run::Alignment::Blat
10
11=head1 SYNOPSIS
12
13 use Bio::Tools::Run::Alignment::Blat;
14 my $factory = Bio::Tools::Run::Alignment::Blat->new(-db => $database);
15 # $report is a Bio::SearchIO-compliant object
16 my $report = $factory->run($seqobj);
17
18=head1 DESCRIPTION
19
20Wrapper module for Blat program. This newer version allows for all parameters to
21be set by passing them as an option to new().
22
23Key bits not implemented yet (TODO):
24
25=over 3
26
27=item * Implement all needed L<Bio::Tools::Run::WrapperBase> methods
28
29Missing are a few, including version().
30
31=item * Re-implement using L<IPC::Run>
32
33Would like to get this running under something less reliant on OS-dependent
34changes within code.
35
36=item * No .2bit or .nib conversions yet
37
38These require callouts to faToNib or faTwoTwoBit, which may or may not be
39installed on a user's machine.  We can possibly add functionality to
40check for faToTwoBit/faToNib and other UCSC tools in the future.
41
42=back
43
44=head1 FEEDBACK
45
46=head2 Mailing Lists
47
48User feedback is an integral part of the evolution of this and other
49Bioperl modules. Send your comments and suggestions preferably to one
50of the Bioperl mailing lists.  Your participation is much appreciated.
51
52  bioperl-l@bioperl.org                  - General discussion
53  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
54
55=head2 Support
56
57Please direct usage questions or support issues to the mailing list:
58
59I<bioperl-l@bioperl.org>
60
61rather than to the module maintainer directly. Many experienced and
62reponsive experts will be able look at the problem and quickly
63address it. Please include a thorough description of the problem
64with code and data examples if at all possible.
65
66=head2 Reporting Bugs
67
68Report bugs to the Bioperl bug tracking system to help us keep track
69the bugs and their resolution.  Bug reports can be submitted via the
70web:
71
72  http://redmine.open-bio.org/projects/bioperl/
73
74=head1 AUTHOR
75
76 Chris Fields - cjfields at bioperl dot org
77
78 Original author - Bala Email bala@tll.org.sg
79
80=head1 APPENDIX
81
82The rest of the documentation details each of the object
83methods. Internal methods are usually preceded with a _
84
85=cut
86
87
88package Bio::Tools::Run::Alignment::Blat;
89
90use strict;
91use warnings;
92use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
93
94use Bio::SeqIO;
95use Bio::Root::Root;
96use Bio::Factory::ApplicationFactoryI;
97use Bio::SearchIO;
98use Bio::Tools::Run::WrapperBase;
99
100our ($PROGRAM, $PROGRAMDIR, $PROGRAMNAME);
101
102our %BLAT_PARAMS = map {$_ => 1} qw(ooc t q tileSize stepSize oneOff
103    minMatch minScore minIdentity maxGap makeOoc repmatch mask qMask repeats
104    minRepeatsDivergence dots out maxIntron);
105
106our %BLAT_SWITCHES = map {$_ => 1} qw(prot noHead trimT noTrimA trimHardA
107                                    fastMap fine extendThroughN);
108
109our %LOCAL_ATTRIBUTES = map {$_ => 1} qw(db DB qsegment hsegment searchio
110                                    outfile_name quiet);
111
112our %searchio_map = (
113    'psl'     => 'psl',
114    'pslx'    => 'psl', # I don't think there is support for this yet
115    'axt'     => 'axt',
116    'blast'   => 'blast',
117    'sim4'    => 'sim4',
118    'wublast' => 'blast',
119    'blast8'  => 'blasttable',
120    'blast9'  => 'blasttable'
121);
122
123
124=head2 new
125
126 Title   : new
127 Usage   : $blat->new( -db => '' )
128 Function: Create a new Blat factory
129 Returns : A new Bio::Tools::Run::Alignment::Blat object
130 Args    : -db       : Mandatory parameter. See db()
131           -qsegment : see qsegment()
132           -tsegment : see tsegment()
133           Also, Blat parameters and flags are accepted: -t, -q, -minIdentity,
134              -minScore, -out, -trimT, ...
135           See Blat's manual for details.
136
137=cut
138
139sub new {
140    my ($class, @args) = @_;
141    my $self = $class->SUPER::new(@args);
142    $self->io->_initialize_io();
143    $self->set_parameters(@args);
144    return $self;
145}
146
147
148=head2 program_name
149
150 Title   : program_name
151 Usage   : $factory->program_name()
152 Function: Get the program name
153 Returns : string
154 Args    : None
155
156=cut
157
158sub program_name {
159    return 'blat';
160}
161
162
163=head2 program_dir
164
165 Title   : program_dir
166 Usage   : $factory->program_dir(@params)
167 Function: returns the program directory, obtained from ENV variable.
168 Returns : string
169 Args    :
170
171=cut
172
173sub program_dir {
174    return Bio::Root::IO->catfile($ENV{BLATDIR}) if $ENV{BLATDIR};
175}
176
177
178=head2 run
179
180 Title   : run()
181 Usage   : $obj->run($query)
182 Function: Run Blat.
183 Returns : A Bio::SearchIO object that holds the results
184 Args    : A Bio::PrimarySeqI object or a file of query sequences
185
186=cut
187
188sub run {
189    my ($self, $query) = @_;
190    if  (ref($query) ) {  # it is an object
191        if (ref($query) =~ /GLOB/) {
192            $self->throw("Cannot use filehandle as argument to run()");
193        }
194        $query = $self->_writeSeqFile($query);
195    }
196    return $self->_run($query);
197}
198
199
200=head2 align
201
202 Title   : align
203 Usage   : $obj->align($query)
204 Function: Alias to run()
205
206=cut
207
208sub align {
209    return shift->run(@_);
210}
211
212
213=head2 db
214
215 Title   : db
216 Usage   : $obj->db()
217 Function: Get or set the file of database sequences (.fa , .nib or .2bit)
218 Returns : Database filename
219 Args    : Database filename
220
221=cut
222
223sub db {
224    my $self = shift;
225    return $self->{blat_db} = shift if @_;
226    return $self->{blat_db};
227}
228
229# this is a kludge for tests (so one might expect this to be used elsewhere).
230# None of the other parameters worked in the past, so not replacing them
231
232*DB = \&db;
233
234
235=head2 qsegment
236
237 Title    : qsegment
238 Usage    : $obj->qsegment('sequence_a:0-1000')
239 Function : pass in a B<UCSC-compliant> string for the query sequence(s)
240 Returns  : string
241 Args     : string
242 Note     : Requires the sequence(s) in question be 2bit or nib format
243 Reminder : UCSC segment/regions coordinates are 0-based half-open (sequence
244            begins at 0, but start isn't counted with length), whereas BioPerl
245            coordinates are 1-based closed (sequence begins with 1, both start
246            and end are counted in the length of the segment). For example, a
247            segment that is 'sequence_a:0-1000' will have BioPerl coordinates of
248            'sequence_a:1-1000', both with the same length (1000).
249
250=cut
251
252sub qsegment {
253    my $self = shift;
254    return $self->{blat_qsegment} = shift if @_;
255    return $self->{blat_qsegment};
256}
257
258
259=head2 tsegment
260
261 Title    : tsegment
262 Usage    : $obj->tsegment('sequence_a:0-1000')
263 Function : pass in a B<UCSC-compliant> string for the target sequence(s)
264 Returns  : string
265 Args     : string
266 Note     : Requires the sequence(s) in question be 2bit or nib format
267 Reminder : UCSC segment/regions coordinates are 0-based half-open (sequence
268            begins at 0, but start isn't counted with length), whereas BioPerl
269            coordinates are 1-based closed (sequence begins with 1, both start
270            and end are counted in the length of the segment). For example, a
271            segment that is 'sequence_a:0-1000' will have BioPerl coordinates of
272            'sequence_a:1-1000', both with the same length (1000).
273
274=cut
275
276sub tsegment {
277    my $self = shift;
278    return $self->{blat_tsegment} = shift if @_;
279    return $self->{blat_tsegment};
280}
281
282
283=head2 outfile_name
284
285 Title    : outfile_name
286 Usage    : $obj->outfile_name('out.blat')
287 Function : Get or set the name for the BLAT output file
288 Returns  : string
289 Args     : string
290
291=cut
292
293# override this, otherwise one gets a default of 'mlc'
294sub outfile_name {
295    my $self = shift;
296    return $self->{blat_outfile} = shift if @_;
297    return $self->{blat_outfile};
298}
299
300
301=head2 searchio
302
303 Title    : searchio
304 Usage    : $obj->searchio{-writer => $writer}
305 Function : Pass in additional parameters to the returned Bio::SearchIO parser
306 Returns  : Hash reference with Bio::SearchIO parameters
307 Args     : Hash reference
308 Note     : Currently, this implementation overrides any passed -format
309            parameter based on whether the output is changed ('out').  This
310            may change if requested, but we can't see the utility of doing so,
311            as requesting mismatched output/parser combinations is just a recipe
312            for disaster
313
314=cut
315
316sub searchio {
317    my ($self, $params) = @_;
318    if ($params && ref $params eq 'HASH') {
319        delete $params->{-format};
320        $self->{blat_searchio} = $params;
321    }
322    return $self->{blat_searchio} || {};
323}
324
325
326=head1 Bio::ParameterBaseI-specific methods
327
328These methods are part of the Bio::ParameterBaseI interface
329
330=head2 set_parameters
331
332 Title   : set_parameters
333 Usage   : $pobj->set_parameters(%params);
334 Function: sets the parameters listed in the hash or array
335 Returns : None
336 Args    : [optional] hash or array of parameter/values.  These can optionally
337           be hash or array references
338 Note    : This only sets parameters; to set methods use the method name
339
340=cut
341
342sub set_parameters {
343    my $self = shift;
344    # circumvent any issues arising from passing in refs
345    my %args = (ref($_[0]) eq 'HASH')  ? %{$_[0]} :
346               (ref($_[0]) eq 'ARRAY') ? @{$_[0]} :
347               @_;
348    # set the parameters passed in, but only ones supported for the program
349    %args = map { my $a = $_;
350              $a =~ s{^-}{};
351              $a => $args{$_};
352                 } sort keys %args;
353
354    while (my ($key, $val) = each %args) {
355        if (exists $BLAT_PARAMS{$key}) {
356            $self->{parameters}->{$key} = $val;
357        } elsif (exists $BLAT_SWITCHES{$key}) {
358            $self->{parameters}->{$key} = $BLAT_SWITCHES{$key} ? 1 : 0;
359        } elsif ($LOCAL_ATTRIBUTES{$key} && $self->can($key)) {
360            $self->$key($val);
361        }
362    }
363}
364
365
366=head2 reset_parameters
367
368 Title   : reset_parameters
369 Usage   : resets values
370 Function: resets parameters to either undef or value in passed hash
371 Returns : none
372 Args    : [optional] hash of parameter-value pairs
373
374=cut
375
376sub reset_parameters {
377    my $self = shift;
378    delete $self->{parameters};
379    if (@_) {
380        $self->set_parameters(@_);
381    }
382}
383
384
385=head2 validate_parameters
386
387 Title   : validate_parameters
388 Usage   : $pobj->validate_parameters(1);
389 Function: sets a flag indicating whether to validate parameters via
390           set_parameters() or reset_parameters()
391 Returns : Bool
392 Args    : [optional] value evaluating to True/False
393 Note    : NYI
394
395=cut
396
397sub validate_parameters { 0 }
398
399
400=head2 parameters_changed
401
402 Title   : parameters_changed
403 Usage   : if ($pobj->parameters_changed) {...}
404 Function: Returns boolean true (1) if parameters have changed
405 Returns : Boolean (0 or 1)
406 Args    : None
407 Note    : This module does not run state checks, so this always returns True
408
409=cut
410
411sub parameters_changed { 1 }
412
413
414=head2 available_parameters
415
416 Title   : available_parameters
417 Usage   : @params = $pobj->available_parameters()
418 Function: Returns a list of the available parameters
419 Returns : Array of parameters
420 Args    : [optional] name of executable being used; defaults to returning all
421           available parameters
422
423=cut
424
425sub available_parameters {
426    my ($self, $exec) = @_;
427    my @params = (sort keys %BLAT_PARAMS, sort keys %BLAT_SWITCHES);
428    return @params;
429}
430
431
432=head2 get_parameters
433
434 Title   : get_parameters
435 Usage   : %params = $pobj->get_parameters;
436 Function: Returns list of set key-value pairs, parameter => value
437 Returns : List of key-value pairs
438 Args    : none
439
440=cut
441
442sub get_parameters {
443    my ($self, $option) = @_;
444    $option ||= ''; # no option
445    my %params;
446    if (exists $self->{parameters}) {
447        %params = map {$_ => $self->{parameters}->{$_}} sort keys %{$self->{parameters}};
448    } else {
449        %params = ();
450    }
451    return %params;
452}
453
454
455=head1 to_* methods
456
457All to_* methods are implementation-specific
458
459=head2 to_exe_string
460
461 Title   : to_exe_string
462 Usage   : $string = $pobj->to_exe_string;
463 Function: Returns string (command line string in this case)
464 Returns : String
465 Args    :
466
467=cut
468
469sub to_exe_string {
470    my ($self, @passed) = @_;
471    my ($seq) = $self->_rearrange([qw(SEQ_FILE)], @passed);
472    $self->throw("Must provide a seq_file") unless defined $seq;
473    my %params = $self->get_parameters();
474
475    my ($exe, $db, $qseg, $tseg) = ($self->executable,
476                                   $self->db,
477                                   $self->qsegment,
478                                   $self->tsegment);
479
480    $self->throw("Executable not found") unless defined($exe);
481
482    if ($tseg) {
483        $db .= ":$tseg";
484    }
485
486    if ($qseg) {
487        $seq .= ":$qseg";
488    }
489
490    my @params;
491
492    for my $p (sort keys %BLAT_SWITCHES) {
493        if (exists $params{$p}) {
494            push @params, "-$p"
495        }
496    }
497
498    for my $p (sort keys %BLAT_PARAMS) {
499        if (exists $params{$p}) {
500            push @params, "-$p=$params{$p}"
501        }
502    }
503
504    # this only passes in the first seq file (no globs are allow AFAIK)
505
506    push @params, ($db, $seq);
507
508    # quiet! Unfortunately, it is NYI
509
510    my $string = "$exe ".join(' ',@params);
511
512    return $string;
513}
514
515
516#=head2 _input
517#
518# Title   : _input
519# Usage   : obj->_input($seqFile)
520# Function: Internal (not to be used directly)
521# Returns :
522# Args    :
523#
524#=cut
525
526sub _input() {
527    my ($self,$infile1) = @_;
528    if (defined $infile1) {
529        $self->{'input'} = $infile1;
530    }
531    return $self->{'input'};
532}
533
534
535#=head2 _database
536#
537# Title   : _database
538# Usage   : obj->_database($seqFile)
539# Function: Internal (not to be used directly)
540# Returns :
541# Args    :
542#
543#=cut
544
545sub _database() {
546    my ($self,$infile1) = @_;
547    $self->{'db'} = $infile1 if(defined $infile1);
548    return $self->{'db'};
549}
550
551
552#=head2 _run
553#
554# Title   : _run
555# Usage   : $obj->_run()
556# Function: Internal (not to be used directly)
557# Returns : A Bio::SearchIO object that contains the results
558# Args    : File of sequences
559#
560#=cut
561
562sub _run {
563    my ($self, $seq_file) = @_;
564    my $str = $self->to_exe_string(-seq_file => $seq_file);
565
566    my $out = $self->outfile_name || $self->_tempfile;
567
568    $str .= " $out".$self->_quiet;
569    $self->debug($str."\n") if( $self->verbose > 0 );
570
571    my %params = $self->get_parameters;
572
573    my $status = system($str);
574    $self->throw( "Blat call ($str) crashed: $? \n") unless $status==0;
575
576    my $format = exists($params{out}) ?
577                $searchio_map{$params{out}} : 'psl';
578
579    my @io = ref ($out) !~ /GLOB/ ? (-file    => $out,) : (-fh    => $out,);
580    my $blat_obj = Bio::SearchIO->new(%{$self->searchio},
581                                    @io,
582                                    -query_type => $params{prot} ? 'protein' :
583                                                    $params{q} || 'dna',
584                                    -hit_type   => $params{prot} ? 'protein' :
585                                                    $params{t} || 'dna',
586                                    -format => $format);
587    return $blat_obj;
588}
589
590
591#=head2 _writeSeqFile
592#
593# Title   : _writeSeqFile
594# Usage   : obj->_writeSeqFile($seq)
595# Function: Internal (not to be used directly)
596# Returns :
597# Args    :
598#
599#=cut
600
601sub _writeSeqFile {
602    my ($self,$seq) = @_;
603    my ($tfh,$inputfile) = $self->io->tempfile(-dir=>$Bio::Root::IO::TEMPDIR);
604    my $in  = Bio::SeqIO->new(-fh => $tfh , '-format' => 'fasta');
605    $in->write_seq($seq);
606    $in->close();
607    return $inputfile;
608}
609
610
611sub _tempfile {
612    my $self = shift;
613    my ($tfh,$outfile) = $self->io->tempfile(-dir=>$Bio::Root::IO::TEMPDIR);
614    # this is because we only want a unique filename
615    close($tfh);
616    return $outfile;
617}
618
619
620sub _quiet {
621    my $self = shift;
622    my $q = '';
623    # BLAT output goes to a file, all other output is STDOUT
624    if ($self->quiet) {
625        $q =  $^O =~ /Win/i ? ' 2>&1 NUL' : ' > /dev/null 2>&1';
626    }
627    return $q;
628}
629
6301;
631