1# 2# Copyright Balamurugan Kumarasamy 3# 4# You may distribute this module under the same terms as perl itself 5# POD documentation - main docs before the code 6 7=head1 NAME 8 9Bio::Tools::Run::Alignment::Blat 10 11=head1 SYNOPSIS 12 13 use Bio::Tools::Run::Alignment::Blat; 14 my $factory = Bio::Tools::Run::Alignment::Blat->new(-db => $database); 15 # $report is a Bio::SearchIO-compliant object 16 my $report = $factory->run($seqobj); 17 18=head1 DESCRIPTION 19 20Wrapper module for Blat program. This newer version allows for all parameters to 21be set by passing them as an option to new(). 22 23Key bits not implemented yet (TODO): 24 25=over 3 26 27=item * Implement all needed L<Bio::Tools::Run::WrapperBase> methods 28 29Missing are a few, including version(). 30 31=item * Re-implement using L<IPC::Run> 32 33Would like to get this running under something less reliant on OS-dependent 34changes within code. 35 36=item * No .2bit or .nib conversions yet 37 38These require callouts to faToNib or faTwoTwoBit, which may or may not be 39installed on a user's machine. We can possibly add functionality to 40check for faToTwoBit/faToNib and other UCSC tools in the future. 41 42=back 43 44=head1 FEEDBACK 45 46=head2 Mailing Lists 47 48User feedback is an integral part of the evolution of this and other 49Bioperl modules. Send your comments and suggestions preferably to one 50of the Bioperl mailing lists. Your participation is much appreciated. 51 52 bioperl-l@bioperl.org - General discussion 53 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 54 55=head2 Support 56 57Please direct usage questions or support issues to the mailing list: 58 59I<bioperl-l@bioperl.org> 60 61rather than to the module maintainer directly. Many experienced and 62reponsive experts will be able look at the problem and quickly 63address it. Please include a thorough description of the problem 64with code and data examples if at all possible. 65 66=head2 Reporting Bugs 67 68Report bugs to the Bioperl bug tracking system to help us keep track 69the bugs and their resolution. Bug reports can be submitted via the 70web: 71 72 http://redmine.open-bio.org/projects/bioperl/ 73 74=head1 AUTHOR 75 76 Chris Fields - cjfields at bioperl dot org 77 78 Original author - Bala Email bala@tll.org.sg 79 80=head1 APPENDIX 81 82The rest of the documentation details each of the object 83methods. Internal methods are usually preceded with a _ 84 85=cut 86 87 88package Bio::Tools::Run::Alignment::Blat; 89 90use strict; 91use warnings; 92use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase); 93 94use Bio::SeqIO; 95use Bio::Root::Root; 96use Bio::Factory::ApplicationFactoryI; 97use Bio::SearchIO; 98use Bio::Tools::Run::WrapperBase; 99 100our ($PROGRAM, $PROGRAMDIR, $PROGRAMNAME); 101 102our %BLAT_PARAMS = map {$_ => 1} qw(ooc t q tileSize stepSize oneOff 103 minMatch minScore minIdentity maxGap makeOoc repmatch mask qMask repeats 104 minRepeatsDivergence dots out maxIntron); 105 106our %BLAT_SWITCHES = map {$_ => 1} qw(prot noHead trimT noTrimA trimHardA 107 fastMap fine extendThroughN); 108 109our %LOCAL_ATTRIBUTES = map {$_ => 1} qw(db DB qsegment hsegment searchio 110 outfile_name quiet); 111 112our %searchio_map = ( 113 'psl' => 'psl', 114 'pslx' => 'psl', # I don't think there is support for this yet 115 'axt' => 'axt', 116 'blast' => 'blast', 117 'sim4' => 'sim4', 118 'wublast' => 'blast', 119 'blast8' => 'blasttable', 120 'blast9' => 'blasttable' 121); 122 123 124=head2 new 125 126 Title : new 127 Usage : $blat->new( -db => '' ) 128 Function: Create a new Blat factory 129 Returns : A new Bio::Tools::Run::Alignment::Blat object 130 Args : -db : Mandatory parameter. See db() 131 -qsegment : see qsegment() 132 -tsegment : see tsegment() 133 Also, Blat parameters and flags are accepted: -t, -q, -minIdentity, 134 -minScore, -out, -trimT, ... 135 See Blat's manual for details. 136 137=cut 138 139sub new { 140 my ($class, @args) = @_; 141 my $self = $class->SUPER::new(@args); 142 $self->io->_initialize_io(); 143 $self->set_parameters(@args); 144 return $self; 145} 146 147 148=head2 program_name 149 150 Title : program_name 151 Usage : $factory->program_name() 152 Function: Get the program name 153 Returns : string 154 Args : None 155 156=cut 157 158sub program_name { 159 return 'blat'; 160} 161 162 163=head2 program_dir 164 165 Title : program_dir 166 Usage : $factory->program_dir(@params) 167 Function: returns the program directory, obtained from ENV variable. 168 Returns : string 169 Args : 170 171=cut 172 173sub program_dir { 174 return Bio::Root::IO->catfile($ENV{BLATDIR}) if $ENV{BLATDIR}; 175} 176 177 178=head2 run 179 180 Title : run() 181 Usage : $obj->run($query) 182 Function: Run Blat. 183 Returns : A Bio::SearchIO object that holds the results 184 Args : A Bio::PrimarySeqI object or a file of query sequences 185 186=cut 187 188sub run { 189 my ($self, $query) = @_; 190 if (ref($query) ) { # it is an object 191 if (ref($query) =~ /GLOB/) { 192 $self->throw("Cannot use filehandle as argument to run()"); 193 } 194 $query = $self->_writeSeqFile($query); 195 } 196 return $self->_run($query); 197} 198 199 200=head2 align 201 202 Title : align 203 Usage : $obj->align($query) 204 Function: Alias to run() 205 206=cut 207 208sub align { 209 return shift->run(@_); 210} 211 212 213=head2 db 214 215 Title : db 216 Usage : $obj->db() 217 Function: Get or set the file of database sequences (.fa , .nib or .2bit) 218 Returns : Database filename 219 Args : Database filename 220 221=cut 222 223sub db { 224 my $self = shift; 225 return $self->{blat_db} = shift if @_; 226 return $self->{blat_db}; 227} 228 229# this is a kludge for tests (so one might expect this to be used elsewhere). 230# None of the other parameters worked in the past, so not replacing them 231 232*DB = \&db; 233 234 235=head2 qsegment 236 237 Title : qsegment 238 Usage : $obj->qsegment('sequence_a:0-1000') 239 Function : pass in a B<UCSC-compliant> string for the query sequence(s) 240 Returns : string 241 Args : string 242 Note : Requires the sequence(s) in question be 2bit or nib format 243 Reminder : UCSC segment/regions coordinates are 0-based half-open (sequence 244 begins at 0, but start isn't counted with length), whereas BioPerl 245 coordinates are 1-based closed (sequence begins with 1, both start 246 and end are counted in the length of the segment). For example, a 247 segment that is 'sequence_a:0-1000' will have BioPerl coordinates of 248 'sequence_a:1-1000', both with the same length (1000). 249 250=cut 251 252sub qsegment { 253 my $self = shift; 254 return $self->{blat_qsegment} = shift if @_; 255 return $self->{blat_qsegment}; 256} 257 258 259=head2 tsegment 260 261 Title : tsegment 262 Usage : $obj->tsegment('sequence_a:0-1000') 263 Function : pass in a B<UCSC-compliant> string for the target sequence(s) 264 Returns : string 265 Args : string 266 Note : Requires the sequence(s) in question be 2bit or nib format 267 Reminder : UCSC segment/regions coordinates are 0-based half-open (sequence 268 begins at 0, but start isn't counted with length), whereas BioPerl 269 coordinates are 1-based closed (sequence begins with 1, both start 270 and end are counted in the length of the segment). For example, a 271 segment that is 'sequence_a:0-1000' will have BioPerl coordinates of 272 'sequence_a:1-1000', both with the same length (1000). 273 274=cut 275 276sub tsegment { 277 my $self = shift; 278 return $self->{blat_tsegment} = shift if @_; 279 return $self->{blat_tsegment}; 280} 281 282 283=head2 outfile_name 284 285 Title : outfile_name 286 Usage : $obj->outfile_name('out.blat') 287 Function : Get or set the name for the BLAT output file 288 Returns : string 289 Args : string 290 291=cut 292 293# override this, otherwise one gets a default of 'mlc' 294sub outfile_name { 295 my $self = shift; 296 return $self->{blat_outfile} = shift if @_; 297 return $self->{blat_outfile}; 298} 299 300 301=head2 searchio 302 303 Title : searchio 304 Usage : $obj->searchio{-writer => $writer} 305 Function : Pass in additional parameters to the returned Bio::SearchIO parser 306 Returns : Hash reference with Bio::SearchIO parameters 307 Args : Hash reference 308 Note : Currently, this implementation overrides any passed -format 309 parameter based on whether the output is changed ('out'). This 310 may change if requested, but we can't see the utility of doing so, 311 as requesting mismatched output/parser combinations is just a recipe 312 for disaster 313 314=cut 315 316sub searchio { 317 my ($self, $params) = @_; 318 if ($params && ref $params eq 'HASH') { 319 delete $params->{-format}; 320 $self->{blat_searchio} = $params; 321 } 322 return $self->{blat_searchio} || {}; 323} 324 325 326=head1 Bio::ParameterBaseI-specific methods 327 328These methods are part of the Bio::ParameterBaseI interface 329 330=head2 set_parameters 331 332 Title : set_parameters 333 Usage : $pobj->set_parameters(%params); 334 Function: sets the parameters listed in the hash or array 335 Returns : None 336 Args : [optional] hash or array of parameter/values. These can optionally 337 be hash or array references 338 Note : This only sets parameters; to set methods use the method name 339 340=cut 341 342sub set_parameters { 343 my $self = shift; 344 # circumvent any issues arising from passing in refs 345 my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} : 346 (ref($_[0]) eq 'ARRAY') ? @{$_[0]} : 347 @_; 348 # set the parameters passed in, but only ones supported for the program 349 %args = map { my $a = $_; 350 $a =~ s{^-}{}; 351 $a => $args{$_}; 352 } sort keys %args; 353 354 while (my ($key, $val) = each %args) { 355 if (exists $BLAT_PARAMS{$key}) { 356 $self->{parameters}->{$key} = $val; 357 } elsif (exists $BLAT_SWITCHES{$key}) { 358 $self->{parameters}->{$key} = $BLAT_SWITCHES{$key} ? 1 : 0; 359 } elsif ($LOCAL_ATTRIBUTES{$key} && $self->can($key)) { 360 $self->$key($val); 361 } 362 } 363} 364 365 366=head2 reset_parameters 367 368 Title : reset_parameters 369 Usage : resets values 370 Function: resets parameters to either undef or value in passed hash 371 Returns : none 372 Args : [optional] hash of parameter-value pairs 373 374=cut 375 376sub reset_parameters { 377 my $self = shift; 378 delete $self->{parameters}; 379 if (@_) { 380 $self->set_parameters(@_); 381 } 382} 383 384 385=head2 validate_parameters 386 387 Title : validate_parameters 388 Usage : $pobj->validate_parameters(1); 389 Function: sets a flag indicating whether to validate parameters via 390 set_parameters() or reset_parameters() 391 Returns : Bool 392 Args : [optional] value evaluating to True/False 393 Note : NYI 394 395=cut 396 397sub validate_parameters { 0 } 398 399 400=head2 parameters_changed 401 402 Title : parameters_changed 403 Usage : if ($pobj->parameters_changed) {...} 404 Function: Returns boolean true (1) if parameters have changed 405 Returns : Boolean (0 or 1) 406 Args : None 407 Note : This module does not run state checks, so this always returns True 408 409=cut 410 411sub parameters_changed { 1 } 412 413 414=head2 available_parameters 415 416 Title : available_parameters 417 Usage : @params = $pobj->available_parameters() 418 Function: Returns a list of the available parameters 419 Returns : Array of parameters 420 Args : [optional] name of executable being used; defaults to returning all 421 available parameters 422 423=cut 424 425sub available_parameters { 426 my ($self, $exec) = @_; 427 my @params = (sort keys %BLAT_PARAMS, sort keys %BLAT_SWITCHES); 428 return @params; 429} 430 431 432=head2 get_parameters 433 434 Title : get_parameters 435 Usage : %params = $pobj->get_parameters; 436 Function: Returns list of set key-value pairs, parameter => value 437 Returns : List of key-value pairs 438 Args : none 439 440=cut 441 442sub get_parameters { 443 my ($self, $option) = @_; 444 $option ||= ''; # no option 445 my %params; 446 if (exists $self->{parameters}) { 447 %params = map {$_ => $self->{parameters}->{$_}} sort keys %{$self->{parameters}}; 448 } else { 449 %params = (); 450 } 451 return %params; 452} 453 454 455=head1 to_* methods 456 457All to_* methods are implementation-specific 458 459=head2 to_exe_string 460 461 Title : to_exe_string 462 Usage : $string = $pobj->to_exe_string; 463 Function: Returns string (command line string in this case) 464 Returns : String 465 Args : 466 467=cut 468 469sub to_exe_string { 470 my ($self, @passed) = @_; 471 my ($seq) = $self->_rearrange([qw(SEQ_FILE)], @passed); 472 $self->throw("Must provide a seq_file") unless defined $seq; 473 my %params = $self->get_parameters(); 474 475 my ($exe, $db, $qseg, $tseg) = ($self->executable, 476 $self->db, 477 $self->qsegment, 478 $self->tsegment); 479 480 $self->throw("Executable not found") unless defined($exe); 481 482 if ($tseg) { 483 $db .= ":$tseg"; 484 } 485 486 if ($qseg) { 487 $seq .= ":$qseg"; 488 } 489 490 my @params; 491 492 for my $p (sort keys %BLAT_SWITCHES) { 493 if (exists $params{$p}) { 494 push @params, "-$p" 495 } 496 } 497 498 for my $p (sort keys %BLAT_PARAMS) { 499 if (exists $params{$p}) { 500 push @params, "-$p=$params{$p}" 501 } 502 } 503 504 # this only passes in the first seq file (no globs are allow AFAIK) 505 506 push @params, ($db, $seq); 507 508 # quiet! Unfortunately, it is NYI 509 510 my $string = "$exe ".join(' ',@params); 511 512 return $string; 513} 514 515 516#=head2 _input 517# 518# Title : _input 519# Usage : obj->_input($seqFile) 520# Function: Internal (not to be used directly) 521# Returns : 522# Args : 523# 524#=cut 525 526sub _input() { 527 my ($self,$infile1) = @_; 528 if (defined $infile1) { 529 $self->{'input'} = $infile1; 530 } 531 return $self->{'input'}; 532} 533 534 535#=head2 _database 536# 537# Title : _database 538# Usage : obj->_database($seqFile) 539# Function: Internal (not to be used directly) 540# Returns : 541# Args : 542# 543#=cut 544 545sub _database() { 546 my ($self,$infile1) = @_; 547 $self->{'db'} = $infile1 if(defined $infile1); 548 return $self->{'db'}; 549} 550 551 552#=head2 _run 553# 554# Title : _run 555# Usage : $obj->_run() 556# Function: Internal (not to be used directly) 557# Returns : A Bio::SearchIO object that contains the results 558# Args : File of sequences 559# 560#=cut 561 562sub _run { 563 my ($self, $seq_file) = @_; 564 my $str = $self->to_exe_string(-seq_file => $seq_file); 565 566 my $out = $self->outfile_name || $self->_tempfile; 567 568 $str .= " $out".$self->_quiet; 569 $self->debug($str."\n") if( $self->verbose > 0 ); 570 571 my %params = $self->get_parameters; 572 573 my $status = system($str); 574 $self->throw( "Blat call ($str) crashed: $? \n") unless $status==0; 575 576 my $format = exists($params{out}) ? 577 $searchio_map{$params{out}} : 'psl'; 578 579 my @io = ref ($out) !~ /GLOB/ ? (-file => $out,) : (-fh => $out,); 580 my $blat_obj = Bio::SearchIO->new(%{$self->searchio}, 581 @io, 582 -query_type => $params{prot} ? 'protein' : 583 $params{q} || 'dna', 584 -hit_type => $params{prot} ? 'protein' : 585 $params{t} || 'dna', 586 -format => $format); 587 return $blat_obj; 588} 589 590 591#=head2 _writeSeqFile 592# 593# Title : _writeSeqFile 594# Usage : obj->_writeSeqFile($seq) 595# Function: Internal (not to be used directly) 596# Returns : 597# Args : 598# 599#=cut 600 601sub _writeSeqFile { 602 my ($self,$seq) = @_; 603 my ($tfh,$inputfile) = $self->io->tempfile(-dir=>$Bio::Root::IO::TEMPDIR); 604 my $in = Bio::SeqIO->new(-fh => $tfh , '-format' => 'fasta'); 605 $in->write_seq($seq); 606 $in->close(); 607 return $inputfile; 608} 609 610 611sub _tempfile { 612 my $self = shift; 613 my ($tfh,$outfile) = $self->io->tempfile(-dir=>$Bio::Root::IO::TEMPDIR); 614 # this is because we only want a unique filename 615 close($tfh); 616 return $outfile; 617} 618 619 620sub _quiet { 621 my $self = shift; 622 my $q = ''; 623 # BLAT output goes to a file, all other output is STDOUT 624 if ($self->quiet) { 625 $q = $^O =~ /Win/i ? ' 2>&1 NUL' : ' > /dev/null 2>&1'; 626 } 627 return $q; 628} 629 6301; 631