1# 2# BioPerl module for Bio::Tools::Run::BEDTools 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Cared for by Dan Kortschak <dan.kortschak@adelaide.edu.au> 7# 8# Copyright Dan Kortschak 9# 10# You may distribute this module under the same terms as perl itself 11 12# POD documentation - main docs before the code 13 14=head1 NAME 15 16Bio::Tools::Run::BEDTools - Run wrapper for the BEDTools suite of programs *BETA* 17 18=head1 SYNOPSIS 19 20 # use a BEDTools program 21 $bedtools_fac = Bio::Tools::Run::BEDTools->new( -command => 'subtract' ); 22 $result_file = $bedtools_fac->run( -bed1 => 'genes.bed', -bed2 => 'mask.bed' ); 23 24 # if IO::Uncompress::Gunzip is available... 25 $result_file = $bedtools_fac->run( -bed1 => 'genes.bed.gz', -bed2 => 'mask.bed.gz' ); 26 27 # be more strict 28 $bedtools_fac->set_parameters( -strandedness => 1 ); 29 30 # and even more... 31 $bedtools_fac->set_parameters( -minimum_overlap => 1e-6 ); 32 33 # create a Bio::SeqFeature::Collection object 34 $features = $bedtools_fac->result( -want => 'Bio::SeqFeature::Collection' ); 35 36=head1 DEPRECATION WARNING 37 38Most executables from BEDTools v>=2.10.1 can read GFF and VCF formats 39in addition to BED format. This requires the use of a new input file param, 40shown in the following documentation, '-bgv', in place of '-bed' for the 41executables that can do this. 42 43This behaviour breaks existing scripts. 44 45=head1 DESCRIPTION 46 47This module provides a wrapper interface for Aaron R. Quinlan and Ira M. Hall's 48utilities C<BEDTools> that allow for (among other things): 49 50=over 51 52=item * Intersecting two BED files in search of overlapping features. 53 54=item * Merging overlapping features. 55 56=item * Screening for paired-end (PE) overlaps between PE sequences and existing genomic features. 57 58=item * Calculating the depth and breadth of sequence coverage across defined "windows" in a genome. 59 60=back 61 62(see L<http://code.google.com/p/bedtools/> for manuals and downloads). 63 64 65=head1 OPTIONS 66 67C<BEDTools> is a suite of 17 commandline executable. This module attempts to 68provide and options comprehensively. You can browse the choices like so: 69 70 $bedtools_fac = Bio::Tools::Run::BEDTools->new; 71 72 # all bowtie commands 73 @all_commands = $bedtools_fac->available_parameters('commands'); 74 @all_commands = $bedtools_fac->available_commands; # alias 75 76 # just for default command ('bam_to_bed') 77 @btb_params = $bedtools_fac->available_parameters('params'); 78 @btb_switches = $bedtools_fac->available_parameters('switches'); 79 @btb_all_options = $bedtools_fac->available_parameters(); 80 81Reasonably mnemonic names have been assigned to the single-letter 82command line options. These are the names returned by 83C<available_parameters>, and can be used in the factory constructor 84like typical BioPerl named parameters. 85 86As a number of options are mutually exclusive, and the interpretation of 87intent is based on last-pass option reaching bowtie with potentially unpredicted 88results. This module will prevent inconsistent switches and parameters 89from being passed. 90 91See L<http://code.google.com/p/bedtools/> for details of BEDTools options. 92 93=head1 FILES 94 95When a command requires filenames, these are provided to the C<run> method, not 96the constructor (C<new()>). To see the set of files required by a command, use 97C<available_parameters('filespec')> or the alias C<filespec()>: 98 99 $bedtools_fac = Bio::Tools::Run::BEDTools->new( -command => 'pair_to_bed' ); 100 @filespec = $bedtools_fac->filespec; 101 102This example returns the following array: 103 104 #bedpe 105 #bam 106 bed 107 #out 108 109This indicates that the bed (C<BEDTools> BED format) file MUST be 110specified, and that the out, bedpe (C<BEDTools> BEDPE format) and bam 111(C<SAM> binary format) file MAY be specified (Note that in this case you 112MUST provide ONE of bedpe OR bam, the module at this stage does not allow 113this information to be queried). Use these in the C<run> call like so: 114 115 $bedtools_fac->run( -bedpe => 'paired.bedpe', 116 -bgv => 'genes.bed', 117 -out => 'overlap' ); 118 119The object will store the programs STDERR output for you in the C<stderr()> 120attribute: 121 122 handle_bed_warning($bedtools_fac) if ($bedtools_fac->stderr =~ /Usage:/); 123 124For the commands 'fasta_from_bed' and 'mask_fasta_from_bed' STDOUT will also 125be captured in the C<stdout()> attribute by default and all other commands 126can be forced to capture program output in STDOUT by setting the -out 127filespec parameter to '-'. 128 129=head1 FEEDBACK 130 131=head2 Mailing Lists 132 133User feedback is an integral part of the evolution of this and other 134Bioperl modules. Send your comments and suggestions preferably to 135the Bioperl mailing list. Your participation is much appreciated. 136 137 bioperl-l@bioperl.org - General discussion 138 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 139 140=head2 Support 141 142Please direct usage questions or support issues to the mailing list: 143 144L<bioperl-l@bioperl.org> 145 146Rather than to the module maintainer directly. Many experienced and 147reponsive experts will be able look at the problem and quickly 148address it. Please include a thorough description of the problem 149with code and data examples if at all possible. 150 151=head2 Reporting Bugs 152 153Report bugs to the Bioperl bug tracking system to help us keep track 154of the bugs and their resolution. Bug reports can be submitted via 155the web: 156 157 http://redmine.open-bio.org/projects/bioperl/ 158 159=head1 AUTHOR - Dan Kortschak 160 161 Email dan.kortschak adelaide.edu.au 162 163=head1 CONTRIBUTORS 164 165Additional contributors names and emails here 166 167=head1 APPENDIX 168 169The rest of the documentation details each of the object methods. 170Internal methods are usually preceded with a _ 171 172=cut 173 174# Let the code begin... 175 176 177package Bio::Tools::Run::BEDTools; 178use strict; 179our $HAVE_IO_UNCOMPRESS; 180 181BEGIN { 182 eval 'require IO::Uncompress::Gunzip; $HAVE_IO_UNCOMPRESS = 1'; 183} 184 185use IPC::Run; 186 187# Object preamble - inherits from Bio::Root::Root 188 189use lib '../../..'; 190use Bio::Tools::Run::BEDTools::Config; 191use Bio::Tools::Run::WrapperBase; 192use Bio::Tools::Run::WrapperBase::CommandExts; 193use Bio::Tools::GuessSeqFormat; 194use Bio::SeqFeature::Generic; 195use Bio::SeqFeature::Collection; 196use Bio::SeqIO; 197use File::Sort qw( sort_file ); 198 199use base qw( Bio::Tools::Run::WrapperBase ); 200 201## BEDTools 202our $program_name = '*bedtools'; 203our $default_cmd = 'bam_to_bed'; 204 205# Note: Other globals imported from Bio::Tools::Run::BEDTools::Config 206our $qual_param = undef; 207our $use_dash = 'single'; 208our $join = ' '; 209 210our %strand_translate = ( 211 '+' => 1, 212 '-' => -1, 213 '.' => 0 214 ); 215 216=head2 new() 217 218 Title : new 219 Usage : my $obj = new Bio::Tools::Run::BEDTools(); 220 Function: Builds a new Bio::Tools::Run::BEDTools object 221 Returns : an instance of Bio::Tools::Run::BEDTools 222 Args : 223 224=cut 225 226sub new { 227 my ($class,@args) = @_; 228 unless (grep /command/, @args) { 229 push @args, '-command', $default_cmd; 230 } 231 my $self = $class->SUPER::new(@args); 232 foreach (keys %command_executables) { 233 $self->executables($_, $self->_find_executable($command_executables{$_})); 234 } 235 $self->want($self->_rearrange([qw(WANT)],@args)); 236 $self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI 237 return $self; 238} 239 240=head2 run() 241 242 Title : run 243 Usage : $result = $bedtools_fac->run(%params); 244 Function: Run a BEDTools command. 245 Returns : Command results (file, IO object or Bio object) 246 Args : Dependent on filespec for command. 247 See $bedtools_fac->filespec and BEDTools Manual. 248 Also accepts -want => '(raw|format|<object_class>)' - see want(). 249 Note : gzipped inputs are allowed if IO::Uncompress::Gunzip 250 is available 251 252=cut 253 254sub run { 255 my $self = shift; 256 257 my ($ann, $bed, $bg, $bgv, $bgv1, $bgv2, $bam, $bedpe, $bedpe1, $bedpe2, $seq, $genome, $out); 258 259 if (!(@_ % 2)) { 260 my %args = @_; 261 if ((grep /^-\w+/, keys %args) == keys %args) { 262 ($ann, $bed, $bg, $bgv, $bgv1, $bgv2, $bam, $bedpe, $bedpe1, $bedpe2, $seq, $genome, $out) = 263 $self->_rearrange([qw( ANN BED BG BGV BGV1 BGV2 BAM 264 BEDPE BEDPE1 BEDPE2 265 SEQ GENOME OUT )], @_); 266 } else { 267 $self->throw("Badly formed named args: ".join(' ',@_)); 268 } 269 } else { 270 if (grep /^-\w+/, @_) { 271 $self->throw("Badly formed named args: ".join(' ',@_)); 272 } else { 273 $self->throw("Require named args."); 274 } 275 } 276 277 # Sanity checks 278 $self->executable || $self->throw("No executable!"); 279 my $cmd = $self->command if $self->can('command'); 280 281 for ($cmd) { 282 283=pod 284 285 Command <in> <out> 286 287 annotate bgv ann(s) #out 288 289=cut 290 m/^annotate$/ && do { 291 $bgv = $self->_uncompress($bgv); 292 $self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format."); 293 @$ann = map { 294 my $a = $_; 295 $a = $self->_uncompress($a); 296 $self->_validate_file_input(-ann => $a) || $self->throw("File '$a' not BED/GFF/VCF format."); 297 $a; 298 } @$ann; 299 last; 300 }; 301 302=pod 303 304 graph_union bg_files #out 305 306=cut 307 m/^graph_union$/ && do { 308 @$bg = map { 309 my $g = $_; 310 $g = $self->_uncompress($g); 311 $self->_validate_file_input(-bg => $g) || $self->throw("File '$a' not BedGraph format."); 312 $g; 313 } @$bg; 314 last; 315 }; 316 317=pod 318 319 fasta_from_bed seq bgv #out 320 321 mask_fasta_from_bed seq bgv #out 322 323=cut 324 m/fasta_from_bed$/ && do { 325 ($out // 0) eq '-' && 326 $self->throw("Cannot capture results in STDOUT with sequence commands."); 327 $seq = $self->_uncompress($seq); 328 $self->_validate_file_input(-seq => $seq) || $self->throw("File '$seq' not fasta format."); 329 $bgv = $self->_uncompress($bgv); 330 $self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format."); 331 last; 332 }; 333 334=pod 335 336 bam_to_bed bam #out 337 338=cut 339 340 m/^bam_to_bed$/ && do { 341 $bam = $self->_uncompress($bam); 342 $self->_validate_file_input(-bam => $bam) || $self->throw("File '$bam' not BAM format."); 343 last; 344 }; 345 346 347=pod 348 349 bed_to_IGV bgv #out 350 351 merge bgv #out 352 353 sort bgv #out 354 355 links bgv #out 356 357=cut 358 359 m/^(?:bed_to_IGV|merge|sort|links)$/ && do { 360 $bgv = $self->_uncompress($bgv); 361 $self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format."); 362 }; 363 364=pod 365 366 b12_to_b6 bed #out 367 368 overlap bed #out 369 370 group_by bed #out 371 372=cut 373 374 m/^(?:b12_to_b6|overlap|group_by)$/ && do { 375 $bed = $self->_uncompress($bed); 376 $self->_validate_file_input(-bed => $bed) || $self->throw("File '$bgv' not BED format."); 377 if ($cmd eq 'group_by') { 378 my $c =(my @c)= split(",",$self->columns); 379 my $o =(my @o)= split(",",$self->operations); 380 unless ($c > 0 && $o == $c) { 381 $self->throw("The command 'group_by' requires "."paired "x($o == $c)."'-columns' and '-operations' parameters"); 382 } 383 } 384 last; 385 }; 386 387=pod 388 389 bed_to_bam bgv #out 390 391 shuffle bgv genome #out 392 393 slop bgv genome #out 394 395 complement bgv genome #out 396 397=cut 398 399 m/^(?:bed_to_bam|shuffle|slop|complement)$/ && do { 400 $bgv = $self->_uncompress($bgv); 401 $self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format."); 402 $genome = $self->_uncompress($genome); 403 $self->_validate_file_input(-genome => $genome) || $self->throw("File '$genome' not genome format."); 404 if ($cmd eq 'slop') { 405 my $l = defined $self->add_to_left; 406 my $r = defined $self->add_to_right; 407 my $b = defined $self->add_bidirectional; 408 # I think I have a lisp 409 unless (($l && $r) || ($b xor ($l || $r))) { 410 $self->throw("The command 'slop' requires an unambiguous description of the slop you want"); 411 } 412 } 413 last; 414 }; 415 416=pod 417 418 genome_coverage bed genome #out 419 420=cut 421 422 m/^genome_coverage$/ && do { 423 $bed = $self->_uncompress($bed); 424 $self->_validate_file_input(-bed => $bed) || $self->throw("File '$bed' not BED format."); 425 $genome = $self->_uncompress($genome); 426 $self->_validate_file_input(-genome => $genome) || $self->throw("File '$genome' not genome format."); 427 my ($th, $tf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => '.bed' ); 428 $th->close; 429 sort_file({k => 1, I => $bed, o => $tf}); 430 $bed = $tf; 431 last; 432 }; 433 434=pod 435 436 window bgv1 bgv2 #out 437 438 closest bgv1 bgv2 #out 439 440 coverage bgv1 bgv2 #out 441 442 subtract bgv1 bgv2 #out 443 444=cut 445 446 m/^(?:window|closest|coverage|subtract)$/ && do { 447 $bgv1 = $self->_uncompress($bgv1); 448 $self->_validate_file_input(-bgv1 => $bgv1) || $self->throw("File '$bgv1' not BED/GFF/VCF format."); 449 $bgv2 = $self->_uncompress($bgv2); 450 $self->_validate_file_input(-bgv2 => $bgv2) || $self->throw("File '$bgv2' not BED/GFF/VCF format."); 451 }; 452 453=pod 454 455 pair_to_pair bedpe1 bedpe2 #out 456 457=cut 458 m/^pair_to_pair$/ && do { 459 $bedpe1 = $self->_uncompress($bedpe1); 460 $self->_validate_file_input(-bedpe1 => $bedpe1) || $self->throw("File '$bedpe1' not BEDPE format."); 461 $bedpe2 = $self->_uncompress($bedpe2); 462 $self->_validate_file_input(-bedpe2 => $bedpe2) || $self->throw("File '$bedpe2' not BEDPE format."); 463 last; 464 }; 465 466=pod 467 468 intersect bgv1|bam bgv2 #out 469 470=cut 471 m/^intersect$/ && do { 472 $bgv1 = $self->_uncompress($bgv1); 473 $bam = $self->_uncompress($bam); 474 ($bam && $self->_validate_file_input(-bam => $bam)) || ($bgv1 && $self->_validate_file_input(-bgv1 => $bgv1)) || 475 $self->throw("File in position 1. not correct format."); 476 $bgv2 = $self->_uncompress($bgv2); 477 $self->_validate_file_input(-bgv2 => $bgv2) || $self->throw("File '$bgv2' not BED/GFF/VCF format."); 478 last; 479 }; 480 481=pod 482 483 pair_to_bed bedpe|bam bgv #out 484 485 bgv* signifies any of BED, GFF or VCF. ann is a bgv. 486 487 NOTE: Replace 'bgv' with 'bed' unless $use_bgv is set. 488 489 490=cut 491 492 m/^pair_to_bed$/ && do { 493 $bedpe = $self->_uncompress($bedpe); 494 $bam = $self->_uncompress($bam); 495 ($bam && $self->_validate_file_input(-bam => $bam)) || ($bedpe && $self->_validate_file_input(-bedpe => $bedpe)) || 496 $self->throw("File in position 1. not correct format."); 497 $bgv = $self->_uncompress($bgv); 498 $self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bed' not BED/GFF/VCF format."); 499 last; 500 } 501 } 502 503 my %params = ( 504 '-ann' => $ann, 505 '-bam' => $bam, 506 '-bed' => $bed, 507 '-bgv' => $bgv, 508 '-bg' => $bg, 509 '-bgv1' => $bgv1, 510 '-bgv2' => $bgv2, 511 '-bedpe' => $bedpe, 512 '-bedpe1' => $bedpe1, 513 '-bedpe2' => $bedpe2, 514 '-seq' => $seq, 515 '-genome' => $genome 516 ); 517 map { 518 delete $params{$_} unless defined $params{$_} 519 } keys %params; 520 521 my $format = $self->_determine_format(\%params); 522 my $suffix = '.'.$format; 523 524 if (!defined $out) { 525 my ($outh, $outf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => $suffix ); 526 $outh->close; 527 $out = $outf; 528 } elsif ($out ne '-') { 529 $out .= $suffix; 530 } else { 531 undef $out; 532 } 533 $params{'-out'} = $out if defined $out; 534 535 $self->_run(%params); 536 537 $self->{'_result'}->{'file_name'} = $out // '-'; 538 $self->{'_result'}->{'format'} = $format; 539 $self->{'_result'}->{'file'} = defined $out ? Bio::Root::IO->new( -file => $out ) : undef; 540 541 return $self->result; 542} 543 544sub _uncompress { 545 my ($self, $file) = @_; 546 547 return if !defined $file; 548 549 return $file unless ($file =~ m/\.gz[^.]*$/); 550 551 return $file unless (-e $file && -r _); # other people will deal with this 552 553 unless ($HAVE_IO_UNCOMPRESS) { 554 croak( "IO::Uncompress::Gunzip not available, can't expand '$file'" ); 555 } 556 my ($tfh, $tf) = $self->io->tempfile( -dir => $self->tempdir() ); 557 my $z = IO::Uncompress::Gunzip->new($file); 558 while (my $block = $z->getline) { print $tfh $block } 559 close $tfh; 560 561 return $tf 562} 563 564=head2 want() 565 566 Title : want 567 Usage : $bowtiefac->want( $class ) 568 Function: make factory return $class, or 'raw' results in file 569 or 'format' for result format 570 All commands can return Bio::Root::IO 571 commands returning: can return object: 572 - BED or BEDPE - Bio::SeqFeature::Collection 573 - sequence - Bio::SeqIO 574 Returns : return wanted type 575 Args : [optional] string indicating class or raw of wanted result 576 577=cut 578 579sub want { 580 my $self = shift; 581 return $self->{'_want'} = shift if @_; 582 return $self->{'_want'}; 583} 584 585=head2 result() 586 587 Title : result 588 Usage : $bedtoolsfac->result( [-want => $type|$format] ) 589 Function: return result in wanted format 590 Returns : results 591 Args : [optional] hashref of wanted type 592 Note : -want arg does not persist between result() call when 593 specified in result(), for persistence, use want() 594 595=cut 596 597sub result { 598 my ($self, @args) = @_; 599 600 my $want = $self->_rearrange([qw(WANT)],@args); 601 $want ||= $self->want; 602 my $cmd = $self->command if $self->can('command'); 603 my $format = $self->{'_result'}->{'format'}; 604 my $file_name = $self->{'_result'}->{'file_name'}; 605 606 return $self->{'_result'}->{'format'} if (defined $want && $want eq 'format'); 607 return $self->{'_result'}->{'file_name'} if (!$want || $want eq 'raw'); 608 return $self->{'_result'}->{'file'} if ($want =~ m/^Bio::Root::IO/); # this will be undef if -out eq '-' 609 610 for ($format) { # these are dissected more finely than seems resonable to allow easy extension 611 m/bed/ && do { 612 for ($want) { 613 m/Bio::SeqFeature::Collection/ && do { 614 unless (defined $self->{'_result'}->{'object'} && 615 ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqFeature::Collection/) { 616 $self->{'_result'}->{'object'} = $self->_read_bed; 617 } 618 return $self->{'_result'}->{'object'}; 619 }; 620 $self->warn("Cannot make '$_' for $format."); 621 return; 622 } 623 last; 624 }; 625 m/bedpe/ && do { 626 for ($want) { 627 m/Bio::SeqFeature::Collection/ && do { 628 unless (defined $self->{'_result'}->{'object'} && 629 ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqFeature::Collection/) { 630 $self->{'_result'}->{'object'} = $self->_read_bedpe; 631 } 632 return $self->{'_result'}->{'object'}; 633 }; 634 $self->warn("Cannot make '$_' for $format."); 635 return; 636 } 637 last; 638 }; 639 m/bam/ && do { 640 $self->warn("Cannot make '$_' for $format."); 641 return; 642 }; 643 m/^(?:fasta|raw)$/ && do { 644 for ($want) { 645 m/Bio::SeqIO/ && do { 646 $file_name eq '-' && $self->throw("Cannot make a SeqIO object from STDOUT."); 647 unless (defined $self->{'_result'}->{'object'} && 648 ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqIO/) { 649 $self->{'_result'}->{'object'} = 650 Bio::SeqIO->new(-file => $file_name, 651 -format => $format); 652 } 653 return $self->{'_result'}->{'object'}; 654 }; 655 $self->warn("Cannot make '$_' for $format."); 656 return; 657 } 658 last; 659 }; 660 m/tab/ && do { 661 $self->warn("Cannot make '$_' for $format."); 662 return; 663 }; 664 m/igv/ && do { 665 $self->warn("Cannot make '$_' for $format."); 666 return; 667 }; 668 m/html/ && do { 669 $self->warn("Cannot make '$_' for $format."); 670 return; 671 }; 672 do { 673 $self->warn("Result format '$_' not recognised - have you called run() yet?"); 674 } 675 } 676} 677 678=head2 _determine_format() 679 680 Title : _determine_format( $has_run ) 681 Usage : $bedtools-fac->_determine_format 682 Function: determine the format of output for current options 683 Returns : format of bowtie output 684 Args : [optional] boolean to indicate result exists 685 686=cut 687 688sub _determine_format { 689 my ($self, $params) = @_; 690 691 my $cmd = $self->command if $self->can('command'); 692 my $format = $format_lookup{$cmd}; 693 694 #special cases - dependent on switches and files 695 for ($cmd) { 696 m/^intersect$/ && do { 697 return 'bed' if !defined $$params{'-bam'} || $self->write_bed; 698 return 'bam'; 699 }; 700 m/^pair_to_bed$/ && do { 701 return 'bedpe' if !defined $$params{'-bam'} || $self->write_bedpe; 702 return 'bam'; 703 }; 704 m/^fasta_from_bed$/ && do { 705 return $self->output_tab_format ? 'tab' : 'fasta'; 706 } 707 } 708 709 return $format; 710} 711 712=head2 _read_bed() 713 714 Title : _read_bed() 715 Usage : $bedtools_fac->_read_bed 716 Function: return a Bio::SeqFeature::Collection object from a BED file 717 Returns : Bio::SeqFeature::Collection 718 Args : 719 720=cut 721 722sub _read_bed { 723 my ($self) = shift; 724 725 my @features; 726 727 if ($self->{'_result'}->{'file_name'} ne '-') { 728 my $in = $self->{'_result'}->{'file'}; 729 while (my $feature = $in->_readline) { 730 chomp $feature; 731 push @features, _read_bed_line($feature); 732 } 733 } else { 734 for my $feature (split("\cJ", $self->stdout)) { 735 push @features, _read_bed_line($feature); 736 } 737 } 738 739 my $collection = Bio::SeqFeature::Collection->new; 740 $collection->add_features(\@features); 741 742 return $collection; 743} 744 745sub _read_bed_line { 746 my $feature = shift; 747 748 my ($chr, $start, $end, $name, $score, $strand, 749 $thick_start, $thick_end, $item_RGB, $block_count, $block_size, $block_start) = 750 split("\cI",$feature); 751 $strand ||= '.'; # BED3 doesn't have strand data - for 'merge' and 'complement' 752 753 return Bio::SeqFeature::Generic->new( -seq_id => $chr, 754 -primary => $name, 755 -start => $start, 756 -end => $end, 757 -strand => $strand_translate{$strand}, 758 -score => $score, 759 -tag => { thick_start => $thick_start, 760 thick_end => $thick_end, 761 item_RGB => $item_RGB, 762 block_count => $block_count, 763 block_size => $block_size, 764 block_start => $block_size } 765 ); 766} 767 768=head2 _read_bedpe() 769 770 Title : _read_bedpe() 771 Usage : $bedtools_fac->_read_bedpe 772 Function: return a Bio::SeqFeature::Collection object from a BEDPE file 773 Returns : Bio::SeqFeature::Collection 774 Args : 775 776=cut 777 778sub _read_bedpe { 779 my ($self) = shift; 780 781 my @features; 782 783 if ($self->{'_result'}->{'file_name'} ne '-') { 784 my $in = $self->{'_result'}->{'file'}; 785 while (my $feature = $in->_readline) { 786 chomp $feature; 787 push @features, _read_bedpe_line($feature); 788 } 789 } else { 790 for my $feature (split("\cJ", $self->stdout)) { 791 push @features, _read_bedpe_line($feature); 792 } 793 } 794 795 my $collection = Bio::SeqFeature::Collection->new; 796 $collection->add_features(\@features); 797 798 return $collection; 799} 800 801sub _read_bedpe_line { 802 my $feature = shift; 803 804 my ($chr1, $start1, $end1, $chr2, $start2, $end2, $name, $score, $strand1, $strand2, @add) = 805 split("\cI",$feature); 806 $strand1 ||= '.'; 807 $strand2 ||= '.'; 808 809 return Bio::SeqFeature::FeaturePair->new( -primary => $name, 810 -seq_id => $chr1, 811 -start => $start1, 812 -end => $end1, 813 -strand => $strand_translate{$strand1}, 814 815 -hprimary_tag => $name, 816 -hseqname => $chr2, 817 -hstart => $start2, 818 -hend => $end2, 819 -hstrand => $strand_translate{$strand2}, 820 821 -score => $score 822 ); 823} 824 825=head2 _validate_file_input() 826 827 Title : _validate_file_input 828 Usage : $bedtools_fac->_validate_file_input( -type => $file ) 829 Function: validate file type for file spec 830 Returns : file type if valid type for file spec 831 Args : hash of filespec => file_name 832 833=cut 834 835sub _validate_file_input { 836 my ($self, @args) = @_; 837 my (%args); 838 if (grep (/^-/, @args) && (@args > 1)) { # named parms 839 $self->throw("Wrong number of args - requires one named arg") if (@args > 2); 840 s/^-// for @args; 841 %args = @args; 842 } else { 843 $self->throw("Must provide named filespec"); 844 } 845 846 for (keys %args) { 847 m/bam/ && do { 848 return 'bam'; 849 }; 850 do { 851 return unless ( -e $args{$_} && -r _ ); 852 my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$args{$_}); 853 return $guesser->guess if grep {$guesser->guess =~ m/$_/} @{$accepted_types{$_}}; 854 } 855 } 856 return; 857} 858 859=head2 version() 860 861 Title : version 862 Usage : $version = $bedtools_fac->version() 863 Function: Returns the program version (if available) 864 Returns : string representing location and version of the program 865 866=cut 867 868sub version{ 869 my ($self) = @_; 870 871 my $cmd = $self->command if $self->can('command'); 872 873 defined $cmd or $self->throw("No command defined - cannot determine program executable"); 874 875 # new bahaviour for some BEDTools executables - breaks previous approach to getting version 876 # $dummy can be any non-recognised parameter - '-version' works for most 877 my $dummy = '-version'; 878 $dummy = '-examples' if $cmd =~ /graph_union/; 879 880 my ($in, $out, $err); 881 my $dum; 882 $in = \$dum; 883 $out = \$self->{'stdout'}; 884 $err = \$self->{'stderr'}; 885 886 # Get program executable 887 my $exe = $self->executable; 888 889 my @ipc_args = ( $exe, $dummy ); 890 891 eval { 892 IPC::Run::run(\@ipc_args, $in, $out, $err) or 893 die ("There was a problem running $exe : $!"); 894 }; 895 # We don't bother trying to catch this: version is returned as an illegal file seek 896 897 my @details = split("\n",$self->stderr); 898 (my $version) = grep /^Program: .*$/, @details; 899 $version =~ s/^Program: //; 900 901 return $version; 902} 903 904sub available_commands { shift->available_parameters('commands') }; 905 906sub filespec { shift->available_parameters('filespec') }; 907 9081; 909