1#
2# BioPerl module for Bio::Tools::Run::BEDTools
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Dan Kortschak <dan.kortschak@adelaide.edu.au>
7#
8# Copyright Dan Kortschak
9#
10# You may distribute this module under the same terms as perl itself
11
12# POD documentation - main docs before the code
13
14=head1 NAME
15
16Bio::Tools::Run::BEDTools - Run wrapper for the BEDTools suite of programs *BETA*
17
18=head1 SYNOPSIS
19
20 # use a BEDTools program
21 $bedtools_fac = Bio::Tools::Run::BEDTools->new( -command => 'subtract' );
22 $result_file = $bedtools_fac->run( -bed1 => 'genes.bed', -bed2 => 'mask.bed' );
23
24 # if IO::Uncompress::Gunzip is available...
25 $result_file = $bedtools_fac->run( -bed1 => 'genes.bed.gz', -bed2 => 'mask.bed.gz' );
26
27 # be more strict
28 $bedtools_fac->set_parameters( -strandedness => 1 );
29
30 # and even more...
31 $bedtools_fac->set_parameters( -minimum_overlap => 1e-6 );
32
33 # create a Bio::SeqFeature::Collection object
34 $features = $bedtools_fac->result( -want => 'Bio::SeqFeature::Collection' );
35
36=head1 DEPRECATION WARNING
37
38Most executables from BEDTools v>=2.10.1 can read GFF and VCF formats
39in addition to BED format. This requires the use of a new input file param,
40shown in the following documentation, '-bgv', in place of '-bed' for the
41executables that can do this.
42
43This behaviour breaks existing scripts.
44
45=head1 DESCRIPTION
46
47This module provides a wrapper interface for Aaron R. Quinlan and Ira M. Hall's
48utilities C<BEDTools> that allow for (among other things):
49
50=over
51
52=item * Intersecting two BED files in search of overlapping features.
53
54=item * Merging overlapping features.
55
56=item * Screening for paired-end (PE) overlaps between PE sequences and existing genomic features.
57
58=item * Calculating the depth and breadth of sequence coverage across defined "windows" in a genome.
59
60=back
61
62(see L<http://code.google.com/p/bedtools/> for manuals and downloads).
63
64
65=head1 OPTIONS
66
67C<BEDTools> is a suite of 17 commandline executable. This module attempts to
68provide and options comprehensively. You can browse the choices like so:
69
70 $bedtools_fac = Bio::Tools::Run::BEDTools->new;
71
72 # all bowtie commands
73 @all_commands = $bedtools_fac->available_parameters('commands');
74 @all_commands = $bedtools_fac->available_commands; # alias
75
76 # just for default command ('bam_to_bed')
77 @btb_params = $bedtools_fac->available_parameters('params');
78 @btb_switches = $bedtools_fac->available_parameters('switches');
79 @btb_all_options = $bedtools_fac->available_parameters();
80
81Reasonably mnemonic names have been assigned to the single-letter
82command line options. These are the names returned by
83C<available_parameters>, and can be used in the factory constructor
84like typical BioPerl named parameters.
85
86As a number of options are mutually exclusive, and the interpretation of
87intent is based on last-pass option reaching bowtie with potentially unpredicted
88results. This module will prevent inconsistent switches and parameters
89from being passed.
90
91See L<http://code.google.com/p/bedtools/> for details of BEDTools options.
92
93=head1 FILES
94
95When a command requires filenames, these are provided to the C<run> method, not
96the constructor (C<new()>). To see the set of files required by a command, use
97C<available_parameters('filespec')> or the alias C<filespec()>:
98
99  $bedtools_fac = Bio::Tools::Run::BEDTools->new( -command => 'pair_to_bed' );
100  @filespec = $bedtools_fac->filespec;
101
102This example returns the following array:
103
104 #bedpe
105 #bam
106 bed
107 #out
108
109This indicates that the bed (C<BEDTools> BED format) file MUST be
110specified, and that the out, bedpe (C<BEDTools> BEDPE format) and bam
111(C<SAM> binary format) file MAY be specified (Note that in this case you
112MUST provide ONE of bedpe OR bam, the module at this stage does not allow
113this information to be queried). Use these in the C<run> call like so:
114
115 $bedtools_fac->run( -bedpe => 'paired.bedpe',
116                     -bgv => 'genes.bed',
117                     -out => 'overlap' );
118
119The object will store the programs STDERR output for you in the C<stderr()>
120attribute:
121
122 handle_bed_warning($bedtools_fac) if ($bedtools_fac->stderr =~ /Usage:/);
123
124For the commands 'fasta_from_bed' and 'mask_fasta_from_bed' STDOUT will also
125be captured in the C<stdout()> attribute by default and all other commands
126can be forced to capture program output in STDOUT by setting the -out
127filespec parameter to '-'.
128
129=head1 FEEDBACK
130
131=head2 Mailing Lists
132
133User feedback is an integral part of the evolution of this and other
134Bioperl modules. Send your comments and suggestions preferably to
135the Bioperl mailing list.  Your participation is much appreciated.
136
137  bioperl-l@bioperl.org                  - General discussion
138  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
139
140=head2 Support
141
142Please direct usage questions or support issues to the mailing list:
143
144L<bioperl-l@bioperl.org>
145
146Rather than to the module maintainer directly. Many experienced and
147reponsive experts will be able look at the problem and quickly
148address it. Please include a thorough description of the problem
149with code and data examples if at all possible.
150
151=head2 Reporting Bugs
152
153Report bugs to the Bioperl bug tracking system to help us keep track
154of the bugs and their resolution. Bug reports can be submitted via
155the web:
156
157  http://redmine.open-bio.org/projects/bioperl/
158
159=head1 AUTHOR - Dan Kortschak
160
161 Email dan.kortschak adelaide.edu.au
162
163=head1 CONTRIBUTORS
164
165Additional contributors names and emails here
166
167=head1 APPENDIX
168
169The rest of the documentation details each of the object methods.
170Internal methods are usually preceded with a _
171
172=cut
173
174# Let the code begin...
175
176
177package Bio::Tools::Run::BEDTools;
178use strict;
179our $HAVE_IO_UNCOMPRESS;
180
181BEGIN {
182    eval 'require IO::Uncompress::Gunzip; $HAVE_IO_UNCOMPRESS = 1';
183}
184
185use IPC::Run;
186
187# Object preamble - inherits from Bio::Root::Root
188
189use lib '../../..';
190use Bio::Tools::Run::BEDTools::Config;
191use Bio::Tools::Run::WrapperBase;
192use Bio::Tools::Run::WrapperBase::CommandExts;
193use Bio::Tools::GuessSeqFormat;
194use Bio::SeqFeature::Generic;
195use Bio::SeqFeature::Collection;
196use Bio::SeqIO;
197use File::Sort qw( sort_file );
198
199use base qw( Bio::Tools::Run::WrapperBase );
200
201## BEDTools
202our $program_name = '*bedtools';
203our $default_cmd = 'bam_to_bed';
204
205# Note: Other globals imported from Bio::Tools::Run::BEDTools::Config
206our $qual_param = undef;
207our $use_dash = 'single';
208our $join = ' ';
209
210our %strand_translate = (
211	'+' => 1,
212	'-' => -1,
213	'.' => 0
214	);
215
216=head2 new()
217
218 Title   : new
219 Usage   : my $obj = new Bio::Tools::Run::BEDTools();
220 Function: Builds a new Bio::Tools::Run::BEDTools object
221 Returns : an instance of Bio::Tools::Run::BEDTools
222 Args    :
223
224=cut
225
226sub new {
227	my ($class,@args) = @_;
228	unless (grep /command/, @args) {
229		push @args, '-command', $default_cmd;
230	}
231	my $self = $class->SUPER::new(@args);
232	foreach (keys %command_executables) {
233		$self->executables($_, $self->_find_executable($command_executables{$_}));
234	}
235	$self->want($self->_rearrange([qw(WANT)],@args));
236	$self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI
237	return $self;
238}
239
240=head2 run()
241
242 Title   : run
243 Usage   : $result = $bedtools_fac->run(%params);
244 Function: Run a BEDTools command.
245 Returns : Command results (file, IO object or Bio object)
246 Args    : Dependent on filespec for command.
247           See $bedtools_fac->filespec and BEDTools Manual.
248           Also accepts -want => '(raw|format|<object_class>)' - see want().
249 Note    : gzipped inputs are allowed if IO::Uncompress::Gunzip
250           is available
251
252=cut
253
254sub run {
255	my $self = shift;
256
257	my ($ann, $bed, $bg, $bgv, $bgv1, $bgv2, $bam, $bedpe, $bedpe1, $bedpe2, $seq, $genome, $out);
258
259	if (!(@_ % 2)) {
260		my %args = @_;
261		if ((grep /^-\w+/, keys %args) == keys %args) {
262			($ann, $bed, $bg, $bgv, $bgv1, $bgv2, $bam, $bedpe, $bedpe1, $bedpe2, $seq, $genome, $out) =
263				$self->_rearrange([qw( ANN BED BG BGV BGV1 BGV2 BAM
264				                       BEDPE BEDPE1 BEDPE2
265				                       SEQ GENOME OUT )], @_);
266		} else {
267			$self->throw("Badly formed named args: ".join(' ',@_));
268		}
269	} else {
270		if (grep /^-\w+/, @_) {
271			$self->throw("Badly formed named args: ".join(' ',@_));
272		} else {
273			$self->throw("Require named args.");
274		}
275	}
276
277	# Sanity checks
278	$self->executable || $self->throw("No executable!");
279	my $cmd = $self->command if $self->can('command');
280
281	for ($cmd) {
282
283=pod
284
285           Command              	<in>			<out>
286
287           annotate             bgv		ann(s)		#out
288
289=cut
290		m/^annotate$/ && do {
291			$bgv = $self->_uncompress($bgv);
292			$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format.");
293			@$ann = map {
294				my $a = $_;
295	            $a = $self->_uncompress($a);
296				$self->_validate_file_input(-ann => $a) || $self->throw("File '$a' not BED/GFF/VCF format.");
297				$a;
298			} @$ann;
299			last;
300		};
301
302=pod
303
304           graph_union	        bg_files			#out
305
306=cut
307		m/^graph_union$/ && do {
308			@$bg = map {
309				my $g = $_;
310	            $g = $self->_uncompress($g);
311				$self->_validate_file_input(-bg => $g) || $self->throw("File '$a' not BedGraph format.");
312				$g;
313			} @$bg;
314			last;
315		};
316
317=pod
318
319           fasta_from_bed       seq		bgv		#out
320
321           mask_fasta_from_bed  seq		bgv		#out
322
323=cut
324		m/fasta_from_bed$/ && do {
325			($out // 0) eq '-' &&
326				$self->throw("Cannot capture results in STDOUT with sequence commands.");
327			$seq = $self->_uncompress($seq);
328			$self->_validate_file_input(-seq => $seq) || $self->throw("File '$seq' not fasta format.");
329			$bgv = $self->_uncompress($bgv);
330			$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format.");
331			last;
332		};
333
334=pod
335
336           bam_to_bed           bam 				#out
337
338=cut
339
340		m/^bam_to_bed$/ && do {
341			$bam = $self->_uncompress($bam);
342			$self->_validate_file_input(-bam => $bam) || $self->throw("File '$bam' not BAM format.");
343			last;
344		};
345
346
347=pod
348
349           bed_to_IGV           bgv 				#out
350
351           merge                bgv 				#out
352
353           sort                 bgv 				#out
354
355           links                bgv 				#out
356
357=cut
358
359		m/^(?:bed_to_IGV|merge|sort|links)$/ && do {
360			$bgv = $self->_uncompress($bgv);
361			$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format.");
362		};
363
364=pod
365
366           b12_to_b6            bed 				#out
367
368           overlap              bed 				#out
369
370           group_by             bed 				#out
371
372=cut
373
374		m/^(?:b12_to_b6|overlap|group_by)$/ && do {
375			$bed = $self->_uncompress($bed);
376			$self->_validate_file_input(-bed => $bed) || $self->throw("File '$bgv' not BED format.");
377			if ($cmd eq 'group_by') {
378				my $c =(my @c)= split(",",$self->columns);
379				my $o =(my @o)= split(",",$self->operations);
380				unless ($c > 0 && $o == $c) {
381					$self->throw("The command 'group_by' requires "."paired "x($o == $c)."'-columns' and '-operations' parameters");
382				}
383			}
384			last;
385		};
386
387=pod
388
389           bed_to_bam           bgv 				#out
390
391           shuffle              bgv 		genome		#out
392
393           slop                 bgv 		genome		#out
394
395           complement           bgv 		genome		#out
396
397=cut
398
399		m/^(?:bed_to_bam|shuffle|slop|complement)$/ && do {
400			$bgv = $self->_uncompress($bgv);
401			$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format.");
402			$genome = $self->_uncompress($genome);
403			$self->_validate_file_input(-genome => $genome) || $self->throw("File '$genome' not genome format.");
404			if ($cmd eq 'slop') {
405				my $l = defined $self->add_to_left;
406				my $r = defined $self->add_to_right;
407				my $b = defined $self->add_bidirectional;
408				# I think I have a lisp
409				unless (($l && $r) || ($b xor ($l || $r))) {
410					$self->throw("The command 'slop' requires an unambiguous description of the slop you want");
411				}
412			}
413			last;
414		};
415
416=pod
417
418           genome_coverage      bed 		genome		#out
419
420=cut
421
422		m/^genome_coverage$/ && do {
423			$bed = $self->_uncompress($bed);
424			$self->_validate_file_input(-bed => $bed) || $self->throw("File '$bed' not BED format.");
425			$genome = $self->_uncompress($genome);
426			$self->_validate_file_input(-genome => $genome) || $self->throw("File '$genome' not genome format.");
427			my ($th, $tf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => '.bed' );
428			$th->close;
429			sort_file({k => 1, I => $bed, o => $tf});
430			$bed = $tf;
431			last;
432		};
433
434=pod
435
436           window               bgv1		bgv2		#out
437
438           closest              bgv1		bgv2		#out
439
440           coverage             bgv1		bgv2		#out
441
442           subtract             bgv1		bgv2		#out
443
444=cut
445
446		m/^(?:window|closest|coverage|subtract)$/ && do {
447			$bgv1 = $self->_uncompress($bgv1);
448			$self->_validate_file_input(-bgv1 => $bgv1) || $self->throw("File '$bgv1' not BED/GFF/VCF format.");
449			$bgv2 = $self->_uncompress($bgv2);
450			$self->_validate_file_input(-bgv2 => $bgv2) || $self->throw("File '$bgv2' not BED/GFF/VCF format.");
451		};
452
453=pod
454
455           pair_to_pair         bedpe1		bedpe2		#out
456
457=cut
458		m/^pair_to_pair$/ && do {
459			$bedpe1 = $self->_uncompress($bedpe1);
460			$self->_validate_file_input(-bedpe1 => $bedpe1) || $self->throw("File '$bedpe1' not BEDPE format.");
461			$bedpe2 = $self->_uncompress($bedpe2);
462			$self->_validate_file_input(-bedpe2 => $bedpe2) || $self->throw("File '$bedpe2' not BEDPE format.");
463			last;
464		};
465
466=pod
467
468           intersect            bgv1|bam	bgv2		#out
469
470=cut
471		m/^intersect$/ && do {
472			$bgv1 = $self->_uncompress($bgv1);
473			$bam = $self->_uncompress($bam);
474			($bam && $self->_validate_file_input(-bam => $bam)) || ($bgv1 && $self->_validate_file_input(-bgv1 => $bgv1)) ||
475				$self->throw("File in position 1. not correct format.");
476			$bgv2 = $self->_uncompress($bgv2);
477			$self->_validate_file_input(-bgv2 => $bgv2) || $self->throw("File '$bgv2' not BED/GFF/VCF format.");
478			last;
479		};
480
481=pod
482
483           pair_to_bed          bedpe|bam	bgv		#out
484
485           bgv* signifies any of BED, GFF or VCF. ann is a bgv.
486
487           NOTE: Replace 'bgv' with 'bed' unless $use_bgv is set.
488
489
490=cut
491
492		m/^pair_to_bed$/ && do {
493			$bedpe = $self->_uncompress($bedpe);
494			$bam = $self->_uncompress($bam);
495			($bam && $self->_validate_file_input(-bam => $bam)) || ($bedpe && $self->_validate_file_input(-bedpe => $bedpe)) ||
496				$self->throw("File in position 1. not correct format.");
497			$bgv = $self->_uncompress($bgv);
498			$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bed' not BED/GFF/VCF format.");
499			last;
500		}
501	}
502
503	my %params = (
504		'-ann'        => $ann,
505		'-bam'        => $bam,
506		'-bed'        => $bed,
507		'-bgv'        => $bgv,
508		'-bg'         => $bg,
509		'-bgv1'       => $bgv1,
510		'-bgv2'       => $bgv2,
511		'-bedpe'      => $bedpe,
512		'-bedpe1'     => $bedpe1,
513		'-bedpe2'     => $bedpe2,
514		'-seq'        => $seq,
515		'-genome'     => $genome
516	);
517	map {
518		delete $params{$_} unless defined $params{$_}
519	} keys %params;
520
521	my $format = $self->_determine_format(\%params);
522	my $suffix = '.'.$format;
523
524	if (!defined $out) {
525		my ($outh, $outf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => $suffix );
526		$outh->close;
527		$out = $outf;
528	} elsif ($out ne '-') {
529		$out .= $suffix;
530	} else {
531		undef $out;
532	}
533	$params{'-out'} = $out if defined $out;
534
535	$self->_run(%params);
536
537	$self->{'_result'}->{'file_name'} = $out // '-';
538	$self->{'_result'}->{'format'} = $format;
539	$self->{'_result'}->{'file'} = defined $out ? Bio::Root::IO->new( -file => $out ) : undef;
540
541	return $self->result;
542}
543
544sub _uncompress {
545        my ($self, $file) = @_;
546
547	return if !defined $file;
548
549        return $file unless ($file =~ m/\.gz[^.]*$/);
550
551	return $file unless (-e $file && -r _); # other people will deal with this
552
553        unless ($HAVE_IO_UNCOMPRESS) {
554                croak( "IO::Uncompress::Gunzip not available, can't expand '$file'" );
555        }
556        my ($tfh, $tf) = $self->io->tempfile( -dir => $self->tempdir() );
557        my $z = IO::Uncompress::Gunzip->new($file);
558        while (my $block = $z->getline) { print $tfh $block }
559        close $tfh;
560
561        return $tf
562}
563
564=head2 want()
565
566 Title   : want
567 Usage   : $bowtiefac->want( $class )
568 Function: make factory return $class, or 'raw' results in file
569           or 'format' for result format
570           All commands can return Bio::Root::IO
571           commands returning:       can return object:
572           - BED or BEDPE            - Bio::SeqFeature::Collection
573           - sequence                - Bio::SeqIO
574 Returns : return wanted type
575 Args    : [optional] string indicating class or raw of wanted result
576
577=cut
578
579sub want {
580	my $self = shift;
581	return $self->{'_want'} = shift if @_;
582	return $self->{'_want'};
583}
584
585=head2 result()
586
587 Title   : result
588 Usage   : $bedtoolsfac->result( [-want => $type|$format] )
589 Function: return result in wanted format
590 Returns : results
591 Args    : [optional] hashref of wanted type
592 Note    : -want arg does not persist between result() call when
593           specified in result(), for persistence, use want()
594
595=cut
596
597sub result {
598	my ($self, @args) = @_;
599
600	my $want = $self->_rearrange([qw(WANT)],@args);
601	$want ||= $self->want;
602	my $cmd = $self->command if $self->can('command');
603	my $format = $self->{'_result'}->{'format'};
604	my $file_name = $self->{'_result'}->{'file_name'};
605
606	return $self->{'_result'}->{'format'} if (defined $want && $want eq 'format');
607	return $self->{'_result'}->{'file_name'} if (!$want || $want eq 'raw');
608	return $self->{'_result'}->{'file'} if ($want =~ m/^Bio::Root::IO/); # this will be undef if -out eq '-'
609
610	for ($format) { # these are dissected more finely than seems resonable to allow easy extension
611		m/bed/ && do {
612			for ($want) {
613				m/Bio::SeqFeature::Collection/ && do {
614					unless (defined $self->{'_result'}->{'object'} &&
615						ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqFeature::Collection/) {
616							$self->{'_result'}->{'object'} = $self->_read_bed;
617					}
618					return $self->{'_result'}->{'object'};
619				};
620				$self->warn("Cannot make '$_' for $format.");
621				return;
622			}
623			last;
624		};
625		m/bedpe/ && do {
626			for ($want) {
627				m/Bio::SeqFeature::Collection/ && do {
628					unless (defined $self->{'_result'}->{'object'} &&
629						ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqFeature::Collection/) {
630							$self->{'_result'}->{'object'} = $self->_read_bedpe;
631					}
632					return $self->{'_result'}->{'object'};
633				};
634				$self->warn("Cannot make '$_' for $format.");
635				return;
636			}
637			last;
638		};
639		m/bam/ && do {
640			$self->warn("Cannot make '$_' for $format.");
641			return;
642		};
643		m/^(?:fasta|raw)$/ && do {
644			for ($want) {
645				m/Bio::SeqIO/ && do {
646					$file_name eq '-' && $self->throw("Cannot make a SeqIO object from STDOUT.");
647					unless (defined $self->{'_result'}->{'object'} &&
648						ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqIO/) {
649							$self->{'_result'}->{'object'} =
650								Bio::SeqIO->new(-file => $file_name,
651								                -format => $format);
652					}
653					return $self->{'_result'}->{'object'};
654				};
655				$self->warn("Cannot make '$_' for $format.");
656				return;
657			}
658			last;
659		};
660		m/tab/ && do {
661			$self->warn("Cannot make '$_' for $format.");
662			return;
663		};
664		m/igv/ && do {
665			$self->warn("Cannot make '$_' for $format.");
666			return;
667		};
668		m/html/ && do {
669			$self->warn("Cannot make '$_' for $format.");
670			return;
671		};
672		do {
673			$self->warn("Result format '$_' not recognised - have you called run() yet?");
674		}
675	}
676}
677
678=head2 _determine_format()
679
680 Title   : _determine_format( $has_run )
681 Usage   : $bedtools-fac->_determine_format
682 Function: determine the format of output for current options
683 Returns : format of bowtie output
684 Args    : [optional] boolean to indicate result exists
685
686=cut
687
688sub _determine_format {
689        my ($self, $params) = @_;
690
691	my $cmd = $self->command if $self->can('command');
692	my $format = $format_lookup{$cmd};
693
694	#special cases - dependent on switches and files
695	for ($cmd) {
696		m/^intersect$/ && do {
697			return 'bed' if !defined $$params{'-bam'} || $self->write_bed;
698			return 'bam';
699		};
700		m/^pair_to_bed$/ && do {
701			return 'bedpe' if !defined $$params{'-bam'} || $self->write_bedpe;
702			return 'bam';
703		};
704		m/^fasta_from_bed$/ && do {
705			return $self->output_tab_format ? 'tab' : 'fasta';
706		}
707	}
708
709	return $format;
710}
711
712=head2 _read_bed()
713
714 Title   : _read_bed()
715 Usage   : $bedtools_fac->_read_bed
716 Function: return a Bio::SeqFeature::Collection object from a BED file
717 Returns : Bio::SeqFeature::Collection
718 Args    :
719
720=cut
721
722sub _read_bed {
723	my ($self) = shift;
724
725	my @features;
726
727	if ($self->{'_result'}->{'file_name'} ne '-') {
728		my $in = $self->{'_result'}->{'file'};
729		while (my $feature = $in->_readline) {
730			chomp $feature;
731			push @features, _read_bed_line($feature);
732		}
733	} else {
734		for my $feature (split("\cJ", $self->stdout)) {
735			push @features, _read_bed_line($feature);
736		}
737	}
738
739	my $collection = Bio::SeqFeature::Collection->new;
740	$collection->add_features(\@features);
741
742	return $collection;
743}
744
745sub _read_bed_line {
746	my $feature = shift;
747
748	my ($chr, $start, $end, $name, $score, $strand,
749	    $thick_start, $thick_end, $item_RGB, $block_count, $block_size, $block_start) =
750		split("\cI",$feature);
751	$strand ||= '.'; # BED3 doesn't have strand data - for 'merge' and 'complement'
752
753	return Bio::SeqFeature::Generic->new( -seq_id  => $chr,
754	                                      -primary => $name,
755	                                      -start   => $start,
756	                                      -end     => $end,
757	                                      -strand  => $strand_translate{$strand},
758	                                      -score   => $score,
759	                                      -tag     => { thick_start => $thick_start,
760	                                                    thick_end   => $thick_end,
761	                                                    item_RGB    => $item_RGB,
762	                                                    block_count => $block_count,
763	                                                    block_size  => $block_size,
764	                                                    block_start => $block_size }
765	                                    );
766}
767
768=head2 _read_bedpe()
769
770 Title   : _read_bedpe()
771 Usage   : $bedtools_fac->_read_bedpe
772 Function: return a Bio::SeqFeature::Collection object from a BEDPE file
773 Returns : Bio::SeqFeature::Collection
774 Args    :
775
776=cut
777
778sub _read_bedpe {
779	my ($self) = shift;
780
781	my @features;
782
783	if ($self->{'_result'}->{'file_name'} ne '-') {
784		my $in = $self->{'_result'}->{'file'};
785		while (my $feature = $in->_readline) {
786			chomp $feature;
787			push @features, _read_bedpe_line($feature);
788		}
789	} else {
790		for my $feature (split("\cJ", $self->stdout)) {
791			push @features, _read_bedpe_line($feature);
792		}
793	}
794
795	my $collection = Bio::SeqFeature::Collection->new;
796	$collection->add_features(\@features);
797
798	return $collection;
799}
800
801sub _read_bedpe_line {
802	my $feature = shift;
803
804	my ($chr1, $start1, $end1, $chr2, $start2, $end2, $name, $score, $strand1, $strand2, @add) =
805		split("\cI",$feature);
806	$strand1 ||= '.';
807	$strand2 ||= '.';
808
809	return Bio::SeqFeature::FeaturePair->new( -primary       => $name,
810	                                          -seq_id        => $chr1,
811	                                          -start         => $start1,
812	                                          -end           => $end1,
813	                                          -strand        => $strand_translate{$strand1},
814
815	                                          -hprimary_tag  => $name,
816	                                          -hseqname      => $chr2,
817	                                          -hstart        => $start2,
818	                                          -hend          => $end2,
819	                                          -hstrand       => $strand_translate{$strand2},
820
821	                                          -score         => $score
822	                                        );
823}
824
825=head2 _validate_file_input()
826
827 Title   : _validate_file_input
828 Usage   : $bedtools_fac->_validate_file_input( -type => $file )
829 Function: validate file type for file spec
830 Returns : file type if valid type for file spec
831 Args    : hash of filespec => file_name
832
833=cut
834
835sub _validate_file_input {
836	my ($self, @args) = @_;
837	my (%args);
838	if (grep (/^-/, @args) && (@args > 1)) { # named parms
839		$self->throw("Wrong number of args - requires one named arg") if (@args > 2);
840		s/^-// for @args;
841		%args = @args;
842	} else {
843		$self->throw("Must provide named filespec");
844	}
845
846	for (keys %args) {
847		m/bam/ && do {
848			return 'bam';
849		};
850		do {
851			return unless ( -e $args{$_} && -r _ );
852			my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$args{$_});
853			return $guesser->guess if grep {$guesser->guess =~ m/$_/} @{$accepted_types{$_}};
854		}
855	}
856	return;
857}
858
859=head2 version()
860
861 Title   : version
862 Usage   : $version = $bedtools_fac->version()
863 Function: Returns the program version (if available)
864 Returns : string representing location and version of the program
865
866=cut
867
868sub version{
869	my ($self) = @_;
870
871	my $cmd = $self->command if $self->can('command');
872
873	defined $cmd or $self->throw("No command defined - cannot determine program executable");
874
875	# new bahaviour for some BEDTools executables - breaks previous approach to getting version
876	# $dummy can be any non-recognised parameter - '-version' works for most
877	my $dummy = '-version';
878	$dummy = '-examples' if $cmd =~ /graph_union/;
879
880	my ($in, $out, $err);
881	my $dum;
882	$in = \$dum;
883	$out = \$self->{'stdout'};
884	$err = \$self->{'stderr'};
885
886	# Get program executable
887	my $exe = $self->executable;
888
889	my @ipc_args = ( $exe, $dummy );
890
891	eval {
892		IPC::Run::run(\@ipc_args, $in, $out, $err) or
893			die ("There was a problem running $exe : $!");
894	};
895	# We don't bother trying to catch this: version is returned as an illegal file seek
896
897	my @details = split("\n",$self->stderr);
898	(my $version) = grep /^Program: .*$/, @details;
899	$version =~ s/^Program: //;
900
901	return $version;
902}
903
904sub available_commands { shift->available_parameters('commands') };
905
906sub filespec { shift->available_parameters('filespec') };
907
9081;
909