1package Bio::Root::IO;
2$Bio::Root::IO::VERSION = '1.7.7';
3use strict;
4use Symbol;
5use IO::Handle;
6use File::Copy;
7use Fcntl;
8use base qw(Bio::Root::Root);
9
10# as of 2016, worked on most systems, but will test this in a RC
11my %modes = ( 0 => 'r', 1 => 'w', 2 => 'rw' );
12
13=head1 NAME
14
15Bio::Root::IO - BioPerl base IO handling class
16
17=head1 SYNOPSIS
18
19    # Use stream I/O in your module
20    $self->{'io'} = Bio::Root::IO->new(-file => "myfile");
21    $self->{'io'}->_print("some stuff");
22    my $line = $self->{'io'}->_readline();
23    $self->{'io'}->_pushback($line);
24    $self->{'io'}->close();
25
26    # obtain platform-compatible filenames
27    $path = Bio::Root::IO->catfile($dir, $subdir, $filename);
28    # obtain a temporary file (created in $TEMPDIR)
29    ($handle) = $io->tempfile();
30
31=head1 DESCRIPTION
32
33This module provides methods that will usually be needed for any sort
34of file- or stream-related input/output, e.g., keeping track of a file
35handle, transient printing and reading from the file handle, a close
36method, automatically closing the handle on garbage collection, etc.
37
38To use this for your own code you will either want to inherit from
39this module, or instantiate an object for every file or stream you are
40dealing with. In the first case this module will most likely not be
41the first class off which your class inherits; therefore you need to
42call _initialize_io() with the named parameters in order to set file
43handle, open file, etc automatically.
44
45Most methods start with an underscore, indicating they are private. In
46OO speak, they are not private but protected, that is, use them in
47your module code, but a client code of your module will usually not
48want to call them (except those not starting with an underscore).
49
50In addition this module contains a couple of convenience methods for
51cross-platform safe tempfile creation and similar tasks. There are
52some CPAN modules related that may not be available on all
53platforms. At present, File::Spec and File::Temp are attempted. This
54module defines $PATHSEP, $TEMPDIR, and $ROOTDIR, which will always be set,
55and $OPENFLAGS, which will be set if either of File::Spec or File::Temp fails.
56
57The -noclose boolean (accessed via the noclose method) prevents a
58filehandle from being closed when the IO object is cleaned up.  This
59is special behavior when a object like a parser might share a
60filehandle with an object like an indexer where it is not proper to
61close the filehandle as it will continue to be reused until the end of the
62stream is reached.  In general you won't want to play with this flag.
63
64=head1 AUTHOR Hilmar Lapp
65
66=cut
67
68our ($FILESPECLOADED,   $FILETEMPLOADED,
69     $FILEPATHLOADED,   $TEMPDIR,
70     $PATHSEP,          $ROOTDIR,
71     $OPENFLAGS,        $VERBOSE,
72     $ONMAC,            $HAS_EOL,       );
73
74my $TEMPCOUNTER;
75my $HAS_WIN32 = 0;
76
77BEGIN {
78    $TEMPCOUNTER = 0;
79    $FILESPECLOADED = 0;
80    $FILETEMPLOADED = 0;
81    $FILEPATHLOADED = 0;
82    $VERBOSE = 0;
83
84    # try to load those modules that may cause trouble on some systems
85    eval {
86        require File::Path;
87        $FILEPATHLOADED = 1;
88    };
89    if( $@ ) {
90        print STDERR "Cannot load File::Path: $@" if( $VERBOSE > 0 );
91        # do nothing
92    }
93
94    # If on Win32, attempt to find Win32 package
95    if($^O =~ /mswin/i) {
96        eval {
97            require Win32;
98            $HAS_WIN32 = 1;
99        };
100    }
101
102    # Try to provide a path separator. Why doesn't File::Spec export this,
103    # or did I miss it?
104    if ($^O =~ /mswin/i) {
105        $PATHSEP = "\\";
106    } elsif($^O =~ /macos/i) {
107        $PATHSEP = ":";
108    } else { # unix
109        $PATHSEP = "/";
110    }
111    eval {
112        require File::Spec;
113        $FILESPECLOADED = 1;
114        $TEMPDIR = File::Spec->tmpdir();
115        $ROOTDIR = File::Spec->rootdir();
116        require File::Temp; # tempfile creation
117        $FILETEMPLOADED = 1;
118    };
119    if( $@ ) {
120        if(! defined($TEMPDIR)) { # File::Spec failed
121            # determine tempdir
122            if (defined $ENV{'TEMPDIR'} && -d $ENV{'TEMPDIR'} ) {
123                $TEMPDIR = $ENV{'TEMPDIR'};
124            } elsif( defined $ENV{'TMPDIR'} && -d $ENV{'TMPDIR'} ) {
125                $TEMPDIR = $ENV{'TMPDIR'};
126            }
127            if($^O =~ /mswin/i) {
128                $TEMPDIR = 'C:\TEMP' unless $TEMPDIR;
129                $ROOTDIR = 'C:';
130            } elsif($^O =~ /macos/i) {
131                $TEMPDIR = "" unless $TEMPDIR; # what is a reasonable default on Macs?
132                $ROOTDIR = ""; # what is reasonable??
133            } else { # unix
134                $TEMPDIR = "/tmp" unless $TEMPDIR;
135                $ROOTDIR = "/";
136            }
137            if (!( -d $TEMPDIR && -w $TEMPDIR )) {
138                $TEMPDIR = '.'; # last resort
139            }
140        }
141        # File::Temp failed (alone, or File::Spec already failed)
142        # determine open flags for tempfile creation using Fcntl
143        $OPENFLAGS = O_CREAT | O_EXCL | O_RDWR;
144        for my $oflag (qw/FOLLOW BINARY LARGEFILE EXLOCK NOINHERIT TEMPORARY/){
145            my ($bit, $func) = (0, "Fcntl::O_" . $oflag);
146            no strict 'refs';
147            $OPENFLAGS |= $bit if eval { $bit = &$func(); 1 };
148        }
149    }
150    $ONMAC = "\015" eq "\n";
151}
152
153
154=head2 new
155
156 Title   : new
157 Usage   : my $io = Bio::Root::IO->new( -file => 'data.txt' );
158 Function: Create new class instance. It automatically calls C<_initialize_io>.
159 Args    : Same named parameters as C<_initialize_io>.
160 Returns : A Bio::Root::IO object
161
162=cut
163
164sub new {
165    my ($caller, @args) = @_;
166    my $self = $caller->SUPER::new(@args);
167    $self->_initialize_io(@args);
168    return $self;
169}
170
171
172=head2 _initialize_io
173
174 Title   : _initialize_io
175 Usage   : $io->_initialize_io(@params);
176 Function: Initializes filehandle and other properties from the parameters.
177 Args    : The following named parameters are currently recognized:
178              -file     name of file to read or write to
179              -fh       file handle to read or write to (mutually exclusive
180                        with -file and -string)
181              -input    name of file, or filehandle (GLOB or IO::Handle object)
182                        to read of write to
183              -string   string to read from (will be converted to filehandle)
184              -url      name of URL to open
185              -flush    boolean flag to autoflush after each write
186              -noclose  boolean flag, when set to true will not close a
187                        filehandle (must explicitly call close($io->_fh)
188              -retries  number of times to try a web fetch before failure
189              -ua_parms when using -url, hashref of key => value parameters
190                        to pass to LWP::UserAgent->new(). A useful value might
191                        be, for example, {timeout => 60 } (ua defaults to 180s)
192 Returns : True
193
194=cut
195
196sub _initialize_io {
197    my($self, @args) = @_;
198
199    $self->_register_for_cleanup(\&_io_cleanup);
200
201    my ($input, $noclose, $file, $fh, $string,
202        $flush, $url, $retries, $ua_parms) =
203        $self->_rearrange([qw(INPUT NOCLOSE FILE FH STRING FLUSH URL RETRIES UA_PARMS)],
204                          @args);
205
206    my $mode;
207
208    if ($url) {
209        $retries ||= 5;
210
211        require LWP::UserAgent;
212        my $ua = LWP::UserAgent->new(%$ua_parms);
213        my $http_result;
214        my ($handle, $tempfile) = $self->tempfile();
215        CORE::close($handle);
216
217        for (my $try = 1 ; $try <= $retries ; $try++) {
218            $http_result = $ua->get($url, ':content_file' => $tempfile);
219            $self->warn("[$try/$retries] tried to fetch $url, but server ".
220                        "threw ". $http_result->code . ".  retrying...")
221              if !$http_result->is_success;
222            last if $http_result->is_success;
223        }
224        $self->throw("Failed to fetch $url, server threw ".$http_result->code)
225          if !$http_result->is_success;
226
227        $file = $tempfile;
228        $mode = '>';
229    }
230
231    delete $self->{'_readbuffer'};
232    delete $self->{'_filehandle'};
233    $self->noclose( $noclose) if defined $noclose;
234    # determine whether the input is a file(name) or a stream
235    if ($input) {
236        if (ref(\$input) eq 'SCALAR') {
237            # we assume that a scalar is a filename
238            if ($file && ($file ne $input)) {
239                $self->throw("Input file given twice: '$file' and '$input' disagree");
240            }
241            $file = $input;
242        } elsif (ref($input) &&
243            ((ref($input) eq 'GLOB') || $input->isa('IO::Handle'))) {
244            # input is a stream
245            $fh = $input;
246        } else {
247            # let's be strict for now
248            $self->throw("Unable to determine type of input $input: ".
249                         "not string and not GLOB");
250        }
251    }
252
253    if (defined($file) && defined($fh)) {
254        $self->throw("Providing both a file and a filehandle for reading - ".
255                     "only one please!");
256    }
257
258    if ($string) {
259        if (defined($file) || defined($fh)) {
260            $self->throw("File or filehandle provided with -string, ".
261                         "please unset if you are using -string as a file");
262        }
263        open $fh, '<', \$string or $self->throw("Could not read string: $!");
264    }
265
266    if (defined($file) && ($file ne '')) {
267        $self->file($file);
268        ($mode, $file) = $self->cleanfile;
269        $mode ||= '<';
270        my $action = ($mode =~ m/>/) ? 'write' : 'read';
271        $fh = Symbol::gensym();
272        open $fh, $mode, $file or $self->throw("Could not $action file '$file': $!");
273    }
274
275    if (defined $fh) {
276        # check filehandle to ensure it's one of:
277        # a GLOB reference, as in: open(my $fh, "myfile");
278        # an IO::Handle or IO::String object
279        # the UNIVERSAL::can added to fix Bug2863
280        unless (   ( ref $fh and ( ref $fh eq 'GLOB' ) )
281                or ( ref $fh and ( UNIVERSAL::can( $fh, 'can' ) )
282                             and (   $fh->isa('IO::Handle')
283                                  or $fh->isa('IO::String') ) )
284               ) {
285            $self->throw("Object $fh does not appear to be a file handle");
286        }
287        if ($HAS_EOL) {
288            binmode $fh, ':raw:eol(LF-Native)';
289        }
290        $self->_fh($fh); # if $fh not provided, defaults to STDIN and STDOUT
291    }
292
293    $self->_flush_on_write(defined $flush ? $flush : 1);
294
295    return 1;
296}
297
298
299=head2 _fh
300
301 Title   : _fh
302 Usage   : $io->_fh($newval);
303 Function: Get or set the file handle for the stream encapsulated.
304 Args    : Optional filehandle to use
305 Returns : Filehandle for the stream
306
307=cut
308
309sub _fh {
310    my ($self, $value) = @_;
311    if ( defined $value) {
312        $self->{'_filehandle'} = $value;
313    }
314    return $self->{'_filehandle'};
315}
316
317
318=head2 mode
319
320 Title   : mode
321 Usage   : $io->mode();
322           $io->mode(-force => 1);
323 Function: Determine if the object was opened for reading or writing
324 Args    : -force: Boolean. Once mode() has been called, the mode is cached for
325                   further calls to mode(). Use this argument to override this
326                   behavior and re-check the object's mode.
327 Returns : Mode of the object:
328            'r'  for readable
329            'w'  for writable
330            'rw' for readable and writable
331            '?'  if mode could not be determined (e.g. for a -url)
332
333=cut
334
335sub mode {
336    my ($self, %arg) = @_;
337
338    # Method 1: IO::Handle::fdopen
339    #    my $iotest = new IO::Handle;
340    #    $iotest->fdopen( dup(fileno($fh)) , 'r' );
341    #    if ($iotest->error == 0) { ... }
342    # It did not actually seem to work under any platform, since there would no
343    # error if the filehandle had been opened writable only. It could not be
344    # hacked around when dealing with unseekable (piped) filehandles.
345
346    # Method 2: readline, a.k.a. the <> operator
347    #    no warnings "io";
348    #    my $line = <$fh>;
349    #    if (defined $line) {
350    #       $self->{'_mode'} = 'r';
351    #    ...
352    # It did not work well either because <> returns undef, i.e. querying the
353    # mode() after having read an entire file returned 'w'.
354
355    if ( $arg{-force} || not exists $self->{'_mode'} ) {
356        # Determine stream mode
357        my $mode;
358        my $fh = $self->_fh;
359        if (defined $fh) {
360            # use fcntl if not Windows-based
361            if ($^O !~ /MSWin32/) {
362                my $m = fcntl($fh, F_GETFL, 0) || 0;
363                $mode = exists $modes{$m & 3}  ? $modes{$m & 3} : '?';
364            } else {
365                # Determine read/write status of filehandle
366                no warnings 'io';
367                if ( defined( read $fh, my $content, 0 ) ) {
368                    # Successfully read 0 bytes
369                    $mode = 'r'
370                }
371                if ( defined( syswrite $fh, '') ) {
372                    # Successfully wrote 0 bytes
373                    $mode ||= '';
374                    $mode  .= 'w';
375                }
376            }
377        } else {
378           # Stream does not have a filehandle... cannot determine mode
379           $mode = '?';
380        }
381        # Save mode for future use
382        $self->{'_mode'} = $mode;
383    }
384    return $self->{'_mode'};
385}
386
387
388=head2 file
389
390 Title   : file
391 Usage   : $io->file('>'.$file);
392           my $file = $io->file;
393 Function: Get or set the name of the file to read or write.
394 Args    : Optional file name (including its mode, e.g. '<' for reading or '>'
395           for writing)
396 Returns : A string representing the filename and its mode.
397
398=cut
399
400sub file {
401    my ($self, $value) = @_;
402    if ( defined $value) {
403        $self->{'_file'} = $value;
404    }
405    return $self->{'_file'};
406}
407
408
409=head2 cleanfile
410
411 Title   : cleanfile
412 Usage   : my ($mode, $file) = $io->cleanfile;
413 Function: Get the name of the file to read or write, stripped of its mode
414           ('>', '<', '+>', '>>', etc).
415 Args    : None
416 Returns : In array context, an array of the mode and the clean filename.
417
418=cut
419
420sub cleanfile {
421    my ($self) = @_;
422    return ($self->{'_file'} =~ m/^ (\+?[><]{1,2})?\s*(.*) $/x);
423}
424
425
426=head2 format
427
428 Title   : format
429 Usage   : $io->format($newval)
430 Function: Get the format of a Bio::Root::IO sequence file or filehandle. Every
431           object inheriting Bio::Root::IO is guaranteed to have a format.
432 Args    : None
433 Returns : Format of the file or filehandle, e.g. fasta, fastq, genbank, embl.
434
435=cut
436
437sub format {
438    my ($self) = @_;
439    my $format = (split '::', ref($self))[-1];
440    return $format;
441}
442
443
444=head2 variant
445
446 Title   : format
447 Usage   : $io->format($newval)
448 Function: Get the variant of a Bio::Root::IO sequence file or filehandle.
449           The format variant depends on the specific format used. Note that
450           not all formats have variants. Also, the Bio::Root::IO-implementing
451           modules that require access to variants need to define a global hash
452           that has the allowed variants as its keys.
453 Args    : None
454 Returns : Variant of the file or filehandle, e.g. sanger, solexa or illumina for
455           the fastq format, or undef for formats that do not have variants.
456
457=cut
458
459sub variant {
460    my ($self, $variant) = @_;
461    if (defined $variant) {
462        $variant = lc $variant;
463        my $var_name = '%'.ref($self).'::variant';
464        my %ok_variants = eval $var_name; # e.g. %Bio::Assembly::IO::ace::variant
465        if (scalar keys %ok_variants == 0) {
466            $self->throw("Could not validate variant because global variant ".
467                         "$var_name was not set or was empty\n");
468        }
469        if (not exists $ok_variants{$variant}) {
470            $self->throw("$variant is not a valid variant of the " .
471                         $self->format . ' format');
472        }
473        $self->{variant} = $variant;
474    }
475    return $self->{variant};
476}
477
478
479=head2 _print
480
481 Title   : _print
482 Usage   : $io->_print(@lines)
483 Function: Print lines of text to the IO stream object.
484 Args    : List of strings to print
485 Returns : True on success, undef on failure
486
487=cut
488
489sub _print {
490    my $self = shift;
491    my $fh = $self->_fh() || \*STDOUT;
492    my $ret = print $fh @_;
493    return $ret;
494}
495
496
497=head2 _insert
498
499 Title   : _insert
500 Usage   : $io->_insert($string,1)
501 Function: Insert some text in a file at the given line number (1-based).
502 Args    : * string to write in file
503           * line number to insert the string at
504 Returns : True
505
506=cut
507
508sub _insert {
509    my ($self, $string, $line_num) = @_;
510    # Line number check
511    if ($line_num < 1) {
512        $self->throw("Could not insert text at line $line_num: the minimum ".
513                     "line number possible is 1.");
514    }
515    # File check
516    my ($mode, $file) = $self->cleanfile;
517    if (not defined $file) {
518        $self->throw('Could not insert a line: IO object was initialized with '.
519                     'something else than a file.');
520    }
521    # Everything that needs to be written is written before we read it
522    $self->flush;
523
524    # Edit the file line by line (no slurping)
525    $self->close;
526    my $temp_file;
527    my $number = 0;
528    while (-e "$file.$number.temp") {
529        $number++;
530    }
531    $temp_file = "$file.$number.temp";
532    copy($file, $temp_file);
533    open my $fh1, '<', $temp_file or $self->throw("Could not read temporary file '$temp_file': $!");
534    open my $fh2, '>', $file      or $self->throw("Could not write file '$file': $!");
535    while (my $line = <$fh1>) {
536        if ($. == $line_num) { # right line for new data
537            print $fh2 $string . $line;
538        }
539        else {
540            print $fh2 $line;
541        }
542    }
543    CORE::close $fh1;
544    CORE::close $fh2;
545    unlink $temp_file or $self->throw("Could not delete temporary file '$temp_file': $!");
546
547    # Line number check (again)
548    if ( $. > 0 && $line_num > $. ) {
549        $self->throw("Could not insert text at line $line_num: there are only ".
550                     "$. lines in file '$file'");
551    }
552    # Re-open the file in append mode to be ready to add text at the end of it
553    # when the next _print() statement comes
554    open my $new_fh, '>>', $file or $self->throw("Could not append to file '$file': $!");
555    $self->_fh($new_fh);
556    # If file is empty and we're inserting at line 1, simply append text to file
557    if ( $. == 0 && $line_num == 1 ) {
558        $self->_print($string);
559    }
560    return 1;
561}
562
563
564=head2 _readline
565
566 Title   : _readline
567 Usage   : local $Bio::Root::IO::HAS_EOL = 1;
568           my $io = Bio::Root::IO->new(-file => 'data.txt');
569           my $line = $io->_readline();
570           $io->close;
571 Function: Read a line of input and normalize all end of line characters.
572
573           End of line characters are typically "\n" on Linux platforms, "\r\n"
574           on Windows and "\r" on older Mac OS. By default, the _readline()
575           method uses the value of $/, Perl's input record separator, to
576           detect the end of each line. This means that you will not get the
577           expected lines if your input has Mac-formatted end of line characters.
578           Also, note that the current implementation does not handle pushed
579           back input correctly unless the pushed back input ends with the
580           value of $/. For each line parsed, its line ending, e.g. "\r\n" is
581           converted to "\n", unless you provide the -raw argument.
582
583           Altogether it is easier to let the PerlIO::eol module automatically
584           detect the proper end of line character and normalize it to "\n". Do
585           so by setting $Bio::Root::IO::HAS_EOL to 1.
586
587 Args    : -raw : Avoid converting end of line characters to "\n" This option
588                  has no effect when using $Bio::Root::IO::HAS_EOL = 1.
589 Returns : Line of input, or undef when there is nothing to read anymore
590
591=cut
592
593sub _readline {
594    my ($self, %param) = @_;
595    my $fh = $self->_fh or return;
596    my $line;
597
598    # if the buffer been filled by _pushback then return the buffer
599    # contents, rather than read from the filehandle
600    if( @{$self->{'_readbuffer'} || [] } ) {
601        $line = shift @{$self->{'_readbuffer'}};
602    } else {
603        $line = <$fh>;
604    }
605
606    # Note: In Windows the "-raw" parameter has no effect, because Perl already discards
607    # the '\r' from the line when reading in text mode from the filehandle
608    # ($line = <$fh>), and put it back automatically when printing
609    if( !$HAS_EOL && !$param{-raw} && (defined $line) ) {
610        # don't strip line endings if -raw or $HAS_EOL is specified
611        $line =~ s/\015\012/\012/g;         # Change all CR/LF pairs to LF
612        $line =~ tr/\015/\n/ unless $ONMAC; # Change all single CRs to NEWLINE
613    }
614    return $line;
615}
616
617
618=head2 _pushback
619
620 Title   : _pushback
621 Usage   : $io->_pushback($newvalue)
622 Function: Puts a line previously read with _readline back into a buffer.
623           buffer can hold as many lines as system memory permits.
624
625           Note that this is only supported for pushing back data ending with
626           the current, localized value of $/. Using this method to push
627           modified data back onto the buffer stack is not supported; see bug
628           843.
629
630 Args    : newvalue
631 Returns : True
632
633=cut
634
635# fix for bug 843, this reveals some unsupported behavior
636
637#sub _pushback {
638#    my ($self, $value) = @_;
639#    if (index($value, $/) >= 0) {
640#        push @{$self->{'_readbuffer'}}, $value;
641#    } else {
642#        $self->throw("Pushing modifed data back not supported: $value");
643#    }
644#}
645
646sub _pushback {
647    my ($self, $value) = @_;
648    return unless $value;
649    unshift @{$self->{'_readbuffer'}}, $value;
650    return 1;
651}
652
653
654=head2 close
655
656 Title   : close
657 Usage   : $io->close()
658 Function: Closes the file handle associated with this IO instance,
659           excepted if -noclose was specified.
660 Args    : None
661 Returns : True
662
663=cut
664
665sub close {
666    my ($self) = @_;
667
668    # do not close if we explicitly asked not to
669    return if $self->noclose;
670
671    if( defined( my $fh = $self->{'_filehandle'} )) {
672        $self->flush;
673        return if ref $fh eq 'GLOB' && (
674            \*STDOUT == $fh || \*STDERR == $fh || \*STDIN  == $fh
675        );
676
677        # don't close IO::Strings
678        CORE::close $fh unless ref $fh && $fh->isa('IO::String');
679    }
680    $self->{'_filehandle'} = undef;
681    delete $self->{'_readbuffer'};
682    return 1;
683}
684
685
686=head2 flush
687
688 Title   : flush
689 Usage   : $io->flush()
690 Function: Flushes the filehandle
691 Args    : None
692 Returns : True
693
694=cut
695
696sub flush {
697    my ($self) = shift;
698
699    if( !defined $self->{'_filehandle'} ) {
700        $self->throw("Flush failed: no filehandle was active");
701    }
702
703    if( ref($self->{'_filehandle'}) =~ /GLOB/ ) {
704        my $oldh = select($self->{'_filehandle'});
705        $| = 1;
706        select($oldh);
707    } else {
708        $self->{'_filehandle'}->flush();
709    }
710    return 1;
711}
712
713
714=head2 noclose
715
716 Title   : noclose
717 Usage   : $io->noclose($newval)
718 Function: Get or set the NOCLOSE flag - setting this to true will prevent a
719           filehandle from being closed when an object is cleaned up or
720           explicitly closed.
721 Args    : Optional new value (a scalar or undef)
722 Returns : Value of noclose (a scalar)
723
724=cut
725
726sub noclose {
727    my $self = shift;
728    return $self->{'_noclose'} = shift if @_;
729    return $self->{'_noclose'};
730}
731
732
733=head2 _io_cleanup
734
735=cut
736
737sub _io_cleanup {
738    my ($self) = @_;
739    $self->close();
740    my $v = $self->verbose;
741
742    # we are planning to cleanup temp files no matter what
743    if (    exists($self->{'_rootio_tempfiles'})
744        and ref($self->{'_rootio_tempfiles'}) =~ /array/i
745        and not $self->save_tempfiles
746        ) {
747        if( $v > 0 ) {
748            warn( "going to remove files ",
749                  join(",",  @{$self->{'_rootio_tempfiles'}}),
750                  "\n");
751        }
752        unlink  (@{$self->{'_rootio_tempfiles'}} );
753    }
754    # cleanup if we are not using File::Temp
755    if (    $self->{'_cleanuptempdir'}
756        and exists($self->{'_rootio_tempdirs'})
757        and ref($self->{'_rootio_tempdirs'}) =~ /array/i
758        and not $self->save_tempfiles
759        ) {
760        if( $v > 0 ) {
761            warn( "going to remove dirs ",
762                  join(",",  @{$self->{'_rootio_tempdirs'}}),
763                  "\n");
764        }
765        $self->rmtree( $self->{'_rootio_tempdirs'});
766    }
767}
768
769
770=head2 exists_exe
771
772 Title   : exists_exe
773 Usage   : $exists = $io->exists_exe('clustalw');
774           $exists = Bio::Root::IO->exists_exe('clustalw')
775           $exists = Bio::Root::IO::exists_exe('clustalw')
776 Function: Determines whether the given executable exists either as file
777           or within the path environment. The latter requires File::Spec
778           to be installed.
779           On Win32-based system, .exe is automatically appended to the program
780           name unless the program name already ends in .exe.
781 Args    : Name of the executable
782 Returns : 1 if the given program is callable as an executable, and 0 otherwise
783
784=cut
785
786sub exists_exe {
787    my ($self, $exe) = @_;
788    $self->throw("Must pass a defined value to exists_exe") unless defined $exe;
789    $exe = $self if (!(ref($self) || $exe));
790    $exe .= '.exe' if(($^O =~ /mswin/i) && ($exe !~ /\.(exe|com|bat|cmd)$/i));
791    return $exe if ( -f $exe && -x $exe ); # full path and exists
792
793    # Ewan's comment. I don't think we need this. People should not be
794    # asking for a program with a pathseparator starting it
795    # $exe =~ s/^$PATHSEP//;
796
797    # Not a full path, or does not exist. Let's see whether it's in the path.
798    if($FILESPECLOADED) {
799        for my $dir (File::Spec->path()) {
800            my $f = Bio::Root::IO->catfile($dir, $exe);
801            return $f if( -f $f && -x $f );
802        }
803    }
804    return 0;
805}
806
807
808=head2 tempfile
809
810 Title   : tempfile
811 Usage   : my ($handle,$tempfile) = $io->tempfile();
812 Function: Create a temporary filename and a handle opened for reading and
813           writing.
814           Caveats: If you do not have File::Temp on your system you should
815           avoid specifying TEMPLATE and SUFFIX.
816 Args    : Named parameters compatible with File::Temp: DIR (defaults to
817           $Bio::Root::IO::TEMPDIR), TEMPLATE, SUFFIX.
818 Returns : A 2-element array, consisting of temporary handle and temporary
819           file name.
820
821=cut
822
823sub tempfile {
824    my ($self, @args) = @_;
825    my ($tfh, $file);
826    my %params = @args;
827
828    # map between naming with and without dash
829    for my $key (keys(%params)) {
830        if( $key =~ /^-/  ) {
831            my $v = $params{$key};
832            delete $params{$key};
833            $params{uc(substr($key,1))} = $v;
834        } else {
835            # this is to upper case
836            my $v = $params{$key};
837            delete $params{$key};
838            $params{uc($key)} = $v;
839        }
840    }
841    $params{'DIR'} = $TEMPDIR if(! exists($params{'DIR'}));
842    unless (exists $params{'UNLINK'} &&
843            defined $params{'UNLINK'} &&
844            ! $params{'UNLINK'} ) {
845        $params{'UNLINK'} = 1;
846    } else {
847        $params{'UNLINK'} = 0;
848    }
849
850    if($FILETEMPLOADED) {
851        if(exists($params{'TEMPLATE'})) {
852            my $template = $params{'TEMPLATE'};
853            delete $params{'TEMPLATE'};
854            ($tfh, $file) = File::Temp::tempfile($template, %params);
855        } else {
856            ($tfh, $file) = File::Temp::tempfile(%params);
857        }
858    } else {
859        my $dir = $params{'DIR'};
860        $file = $self->catfile(
861            $dir,
862            (exists($params{'TEMPLATE'}) ?
863             $params{'TEMPLATE'} :
864             sprintf( "%s.%s.%s", $ENV{USER} || 'unknown', $$, $TEMPCOUNTER++))
865        );
866
867        # sneakiness for getting around long filenames on Win32?
868        if( $HAS_WIN32 ) {
869            $file = Win32::GetShortPathName($file);
870        }
871
872        # Try to make sure this will be marked close-on-exec
873        # XXX: Win32 doesn't respect this, nor the proper fcntl,
874        #      but may have O_NOINHERIT. This may or may not be in Fcntl.
875        local $^F = 2;
876        # Store callers umask
877        my $umask = umask();
878        # Set a known umaskr
879        umask(066);
880        # Attempt to open the file
881        if ( sysopen($tfh, $file, $OPENFLAGS, 0600) ) {
882            # Reset umask
883            umask($umask);
884        } else {
885            $self->throw("Could not write temporary file '$file': $!");
886        }
887    }
888
889    if(  $params{'UNLINK'} ) {
890        push @{$self->{'_rootio_tempfiles'}}, $file;
891    }
892
893    return wantarray ? ($tfh,$file) : $tfh;
894}
895
896
897=head2  tempdir
898
899 Title   : tempdir
900 Usage   : my ($tempdir) = $io->tempdir(CLEANUP=>1);
901 Function: Creates and returns the name of a new temporary directory.
902
903           Note that you should not use this function for obtaining "the"
904           temp directory. Use $Bio::Root::IO::TEMPDIR for that. Calling this
905           method will in fact create a new directory.
906
907 Args    : args - ( key CLEANUP ) indicates whether or not to cleanup
908           dir on object destruction, other keys as specified by File::Temp
909 Returns : The name of a new temporary directory.
910
911=cut
912
913sub tempdir {
914    my ($self, @args) = @_;
915    if ($FILETEMPLOADED && File::Temp->can('tempdir')) {
916        return File::Temp::tempdir(@args);
917    }
918
919    # we have to do this ourselves, not good
920    # we are planning to cleanup temp files no matter what
921    my %params = @args;
922    print "cleanup is " . $params{CLEANUP} . "\n";
923    $self->{'_cleanuptempdir'} = ( defined $params{CLEANUP} &&
924                                   $params{CLEANUP} == 1);
925    my $tdir = $self->catfile( $TEMPDIR,
926                               sprintf("dir_%s-%s-%s",
927                                       $ENV{USER} || 'unknown',
928                                       $$,
929                                       $TEMPCOUNTER++));
930    mkdir($tdir, 0755);
931    push @{$self->{'_rootio_tempdirs'}}, $tdir;
932    return $tdir;
933}
934
935
936=head2 catfile
937
938 Title   : catfile
939 Usage   : $path = Bio::Root::IO->catfile(@dirs, $filename);
940 Function: Constructs a full pathname in a cross-platform safe way.
941
942           If File::Spec exists on your system, this routine will merely
943           delegate to it. Otherwise it tries to make a good guess.
944
945           You should use this method whenever you construct a path name
946           from directory and filename. Otherwise you risk cross-platform
947           compatibility of your code.
948
949           You can call this method both as a class and an instance method.
950
951 Args    : components of the pathname (directories and filename, NOT an
952           extension)
953 Returns : a string
954
955=cut
956
957sub catfile {
958    my ($self, @args) = @_;
959
960    return File::Spec->catfile(@args) if $FILESPECLOADED;
961    # this is clumsy and not very appealing, but how do we specify the
962    # root directory?
963    if($args[0] eq '/') {
964        $args[0] = $ROOTDIR;
965    }
966    return join($PATHSEP, @args);
967}
968
969
970=head2 rmtree
971
972 Title   : rmtree
973 Usage   : Bio::Root::IO->rmtree($dirname );
974 Function: Remove a full directory tree
975
976           If File::Path exists on your system, this routine will merely
977           delegate to it. Otherwise it runs a local version of that code.
978
979           You should use this method to remove directories which contain
980           files.
981
982           You can call this method both as a class and an instance method.
983
984 Args    : roots - rootdir to delete or reference to list of dirs
985
986           verbose - a boolean value, which if TRUE will cause
987                     C<rmtree> to print a message each time it
988                     examines a file, giving the name of the file, and
989                     indicating whether it's using C<rmdir> or
990                     C<unlink> to remove it, or that it's skipping it.
991                     (defaults to FALSE)
992
993           safe - a boolean value, which if TRUE will cause C<rmtree>
994                  to skip any files to which you do not have delete
995                  access (if running under VMS) or write access (if
996                  running under another OS).  This will change in the
997                  future when a criterion for 'delete permission'
998                  under OSs other than VMS is settled.  (defaults to
999                  FALSE)
1000 Returns : number of files successfully deleted
1001
1002=cut
1003
1004# taken straight from File::Path VERSION = "1.0403"
1005sub rmtree {
1006    my ($self, $roots, $verbose, $safe) = @_;
1007    if ( $FILEPATHLOADED ) {
1008        return File::Path::rmtree ($roots, $verbose, $safe);
1009    }
1010
1011    my $force_writable = ($^O eq 'os2' || $^O eq 'dos' || $^O eq 'MSWin32' ||
1012                          $^O eq 'amigaos' || $^O eq 'cygwin');
1013    my $Is_VMS = $^O eq 'VMS';
1014
1015    my @files;
1016    my $count = 0;
1017    $verbose ||= 0;
1018    $safe    ||= 0;
1019    if ( defined($roots) && length($roots) ) {
1020        $roots = [$roots] unless ref $roots;
1021    } else {
1022        $self->warn("No root path(s) specified\n");
1023        return 0;
1024    }
1025
1026    my $root;
1027    for $root (@{$roots}) {
1028        $root =~ s#/\z##;
1029        (undef, undef, my $rp) = lstat $root or next;
1030        $rp &= 07777;   # don't forget setuid, setgid, sticky bits
1031        if ( -d _ ) {
1032            # notabene: 0777 is for making readable in the first place,
1033            # it's also intended to change it to writable in case we have
1034            # to recurse in which case we are better than rm -rf for
1035            # subtrees with strange permissions
1036            chmod(0777, ($Is_VMS ? VMS::Filespec::fileify($root) : $root))
1037              or $self->warn("Could not make directory '$root' read+writable: $!")
1038            unless $safe;
1039            if (opendir DIR, $root){
1040                @files = readdir DIR;
1041                closedir DIR;
1042            } else {
1043                $self->warn("Could not read directory '$root': $!");
1044                @files = ();
1045            }
1046
1047            # Deleting large numbers of files from VMS Files-11 filesystems
1048            # is faster if done in reverse ASCIIbetical order
1049            @files = reverse @files if $Is_VMS;
1050            ($root = VMS::Filespec::unixify($root)) =~ s#\.dir\z## if $Is_VMS;
1051            @files = map("$root/$_", grep $_!~/^\.{1,2}\z/s,@files);
1052            $count += $self->rmtree([@files],$verbose,$safe);
1053            if ($safe &&
1054              ($Is_VMS ? !&VMS::Filespec::candelete($root) : !-w $root)) {
1055                print "skipped '$root'\n" if $verbose;
1056                next;
1057            }
1058            chmod 0777, $root
1059              or $self->warn("Could not make directory '$root' writable: $!")
1060              if $force_writable;
1061            print "rmdir '$root'\n" if $verbose;
1062            if (rmdir $root) {
1063                ++$count;
1064            }
1065            else {
1066                $self->warn("Could not remove directory '$root': $!");
1067                chmod($rp, ($Is_VMS ? VMS::Filespec::fileify($root) : $root))
1068                  or $self->warn("and can't restore permissions to "
1069                                 . sprintf("0%o",$rp) . "\n");
1070            }
1071        }
1072        else {
1073            if (     $safe
1074                and ($Is_VMS ? !&VMS::Filespec::candelete($root)
1075                             : !(-l $root || -w $root))
1076                ) {
1077                print "skipped '$root'\n" if $verbose;
1078                next;
1079            }
1080            chmod 0666, $root
1081              or $self->warn( "Could not make file '$root' writable: $!")
1082              if $force_writable;
1083            warn "unlink '$root'\n" if $verbose;
1084            # delete all versions under VMS
1085            for (;;) {
1086                unless (unlink $root) {
1087                    $self->warn("Could not unlink file '$root': $!");
1088                    if ($force_writable) {
1089                        chmod $rp, $root
1090                          or $self->warn("and can't restore permissions to "
1091                                         . sprintf("0%o",$rp) . "\n");
1092                    }
1093                    last;
1094                }
1095                ++$count;
1096                last unless $Is_VMS && lstat $root;
1097            }
1098        }
1099    }
1100
1101    return $count;
1102}
1103
1104
1105=head2 _flush_on_write
1106
1107 Title   : _flush_on_write
1108 Usage   : $io->_flush_on_write($newval)
1109 Function: Boolean flag to indicate whether to flush
1110           the filehandle on writing when the end of
1111           a component is finished (Sequences, Alignments, etc)
1112 Args    : Optional new value
1113 Returns : Value of _flush_on_write
1114
1115=cut
1116
1117sub _flush_on_write {
1118    my ($self, $value) = @_;
1119    if (defined $value) {
1120        $self->{'_flush_on_write'} = $value;
1121    }
1122    return $self->{'_flush_on_write'};
1123}
1124
1125
1126=head2 save_tempfiles
1127
1128 Title   : save_tempfiles
1129 Usage   : $io->save_tempfiles(1)
1130 Function: Boolean flag to indicate whether to retain tempfiles/tempdir
1131 Args    : Value evaluating to TRUE or FALSE
1132 Returns : Boolean value : 1 = save tempfiles/tempdirs, 0 = remove (default)
1133
1134=cut
1135
1136sub save_tempfiles {
1137    my $self = shift;
1138    if (@_) {
1139        my $value = shift;
1140        $self->{save_tempfiles} = $value ? 1 : 0;
1141    }
1142    return $self->{save_tempfiles} || 0;
1143}
1144
1145
11461;
1147