1package Bio::Root::IO; 2$Bio::Root::IO::VERSION = '1.7.7'; 3use strict; 4use Symbol; 5use IO::Handle; 6use File::Copy; 7use Fcntl; 8use base qw(Bio::Root::Root); 9 10# as of 2016, worked on most systems, but will test this in a RC 11my %modes = ( 0 => 'r', 1 => 'w', 2 => 'rw' ); 12 13=head1 NAME 14 15Bio::Root::IO - BioPerl base IO handling class 16 17=head1 SYNOPSIS 18 19 # Use stream I/O in your module 20 $self->{'io'} = Bio::Root::IO->new(-file => "myfile"); 21 $self->{'io'}->_print("some stuff"); 22 my $line = $self->{'io'}->_readline(); 23 $self->{'io'}->_pushback($line); 24 $self->{'io'}->close(); 25 26 # obtain platform-compatible filenames 27 $path = Bio::Root::IO->catfile($dir, $subdir, $filename); 28 # obtain a temporary file (created in $TEMPDIR) 29 ($handle) = $io->tempfile(); 30 31=head1 DESCRIPTION 32 33This module provides methods that will usually be needed for any sort 34of file- or stream-related input/output, e.g., keeping track of a file 35handle, transient printing and reading from the file handle, a close 36method, automatically closing the handle on garbage collection, etc. 37 38To use this for your own code you will either want to inherit from 39this module, or instantiate an object for every file or stream you are 40dealing with. In the first case this module will most likely not be 41the first class off which your class inherits; therefore you need to 42call _initialize_io() with the named parameters in order to set file 43handle, open file, etc automatically. 44 45Most methods start with an underscore, indicating they are private. In 46OO speak, they are not private but protected, that is, use them in 47your module code, but a client code of your module will usually not 48want to call them (except those not starting with an underscore). 49 50In addition this module contains a couple of convenience methods for 51cross-platform safe tempfile creation and similar tasks. There are 52some CPAN modules related that may not be available on all 53platforms. At present, File::Spec and File::Temp are attempted. This 54module defines $PATHSEP, $TEMPDIR, and $ROOTDIR, which will always be set, 55and $OPENFLAGS, which will be set if either of File::Spec or File::Temp fails. 56 57The -noclose boolean (accessed via the noclose method) prevents a 58filehandle from being closed when the IO object is cleaned up. This 59is special behavior when a object like a parser might share a 60filehandle with an object like an indexer where it is not proper to 61close the filehandle as it will continue to be reused until the end of the 62stream is reached. In general you won't want to play with this flag. 63 64=head1 AUTHOR Hilmar Lapp 65 66=cut 67 68our ($FILESPECLOADED, $FILETEMPLOADED, 69 $FILEPATHLOADED, $TEMPDIR, 70 $PATHSEP, $ROOTDIR, 71 $OPENFLAGS, $VERBOSE, 72 $ONMAC, $HAS_EOL, ); 73 74my $TEMPCOUNTER; 75my $HAS_WIN32 = 0; 76 77BEGIN { 78 $TEMPCOUNTER = 0; 79 $FILESPECLOADED = 0; 80 $FILETEMPLOADED = 0; 81 $FILEPATHLOADED = 0; 82 $VERBOSE = 0; 83 84 # try to load those modules that may cause trouble on some systems 85 eval { 86 require File::Path; 87 $FILEPATHLOADED = 1; 88 }; 89 if( $@ ) { 90 print STDERR "Cannot load File::Path: $@" if( $VERBOSE > 0 ); 91 # do nothing 92 } 93 94 # If on Win32, attempt to find Win32 package 95 if($^O =~ /mswin/i) { 96 eval { 97 require Win32; 98 $HAS_WIN32 = 1; 99 }; 100 } 101 102 # Try to provide a path separator. Why doesn't File::Spec export this, 103 # or did I miss it? 104 if ($^O =~ /mswin/i) { 105 $PATHSEP = "\\"; 106 } elsif($^O =~ /macos/i) { 107 $PATHSEP = ":"; 108 } else { # unix 109 $PATHSEP = "/"; 110 } 111 eval { 112 require File::Spec; 113 $FILESPECLOADED = 1; 114 $TEMPDIR = File::Spec->tmpdir(); 115 $ROOTDIR = File::Spec->rootdir(); 116 require File::Temp; # tempfile creation 117 $FILETEMPLOADED = 1; 118 }; 119 if( $@ ) { 120 if(! defined($TEMPDIR)) { # File::Spec failed 121 # determine tempdir 122 if (defined $ENV{'TEMPDIR'} && -d $ENV{'TEMPDIR'} ) { 123 $TEMPDIR = $ENV{'TEMPDIR'}; 124 } elsif( defined $ENV{'TMPDIR'} && -d $ENV{'TMPDIR'} ) { 125 $TEMPDIR = $ENV{'TMPDIR'}; 126 } 127 if($^O =~ /mswin/i) { 128 $TEMPDIR = 'C:\TEMP' unless $TEMPDIR; 129 $ROOTDIR = 'C:'; 130 } elsif($^O =~ /macos/i) { 131 $TEMPDIR = "" unless $TEMPDIR; # what is a reasonable default on Macs? 132 $ROOTDIR = ""; # what is reasonable?? 133 } else { # unix 134 $TEMPDIR = "/tmp" unless $TEMPDIR; 135 $ROOTDIR = "/"; 136 } 137 if (!( -d $TEMPDIR && -w $TEMPDIR )) { 138 $TEMPDIR = '.'; # last resort 139 } 140 } 141 # File::Temp failed (alone, or File::Spec already failed) 142 # determine open flags for tempfile creation using Fcntl 143 $OPENFLAGS = O_CREAT | O_EXCL | O_RDWR; 144 for my $oflag (qw/FOLLOW BINARY LARGEFILE EXLOCK NOINHERIT TEMPORARY/){ 145 my ($bit, $func) = (0, "Fcntl::O_" . $oflag); 146 no strict 'refs'; 147 $OPENFLAGS |= $bit if eval { $bit = &$func(); 1 }; 148 } 149 } 150 $ONMAC = "\015" eq "\n"; 151} 152 153 154=head2 new 155 156 Title : new 157 Usage : my $io = Bio::Root::IO->new( -file => 'data.txt' ); 158 Function: Create new class instance. It automatically calls C<_initialize_io>. 159 Args : Same named parameters as C<_initialize_io>. 160 Returns : A Bio::Root::IO object 161 162=cut 163 164sub new { 165 my ($caller, @args) = @_; 166 my $self = $caller->SUPER::new(@args); 167 $self->_initialize_io(@args); 168 return $self; 169} 170 171 172=head2 _initialize_io 173 174 Title : _initialize_io 175 Usage : $io->_initialize_io(@params); 176 Function: Initializes filehandle and other properties from the parameters. 177 Args : The following named parameters are currently recognized: 178 -file name of file to read or write to 179 -fh file handle to read or write to (mutually exclusive 180 with -file and -string) 181 -input name of file, or filehandle (GLOB or IO::Handle object) 182 to read of write to 183 -string string to read from (will be converted to filehandle) 184 -url name of URL to open 185 -flush boolean flag to autoflush after each write 186 -noclose boolean flag, when set to true will not close a 187 filehandle (must explicitly call close($io->_fh) 188 -retries number of times to try a web fetch before failure 189 -ua_parms when using -url, hashref of key => value parameters 190 to pass to LWP::UserAgent->new(). A useful value might 191 be, for example, {timeout => 60 } (ua defaults to 180s) 192 Returns : True 193 194=cut 195 196sub _initialize_io { 197 my($self, @args) = @_; 198 199 $self->_register_for_cleanup(\&_io_cleanup); 200 201 my ($input, $noclose, $file, $fh, $string, 202 $flush, $url, $retries, $ua_parms) = 203 $self->_rearrange([qw(INPUT NOCLOSE FILE FH STRING FLUSH URL RETRIES UA_PARMS)], 204 @args); 205 206 my $mode; 207 208 if ($url) { 209 $retries ||= 5; 210 211 require LWP::UserAgent; 212 my $ua = LWP::UserAgent->new(%$ua_parms); 213 my $http_result; 214 my ($handle, $tempfile) = $self->tempfile(); 215 CORE::close($handle); 216 217 for (my $try = 1 ; $try <= $retries ; $try++) { 218 $http_result = $ua->get($url, ':content_file' => $tempfile); 219 $self->warn("[$try/$retries] tried to fetch $url, but server ". 220 "threw ". $http_result->code . ". retrying...") 221 if !$http_result->is_success; 222 last if $http_result->is_success; 223 } 224 $self->throw("Failed to fetch $url, server threw ".$http_result->code) 225 if !$http_result->is_success; 226 227 $file = $tempfile; 228 $mode = '>'; 229 } 230 231 delete $self->{'_readbuffer'}; 232 delete $self->{'_filehandle'}; 233 $self->noclose( $noclose) if defined $noclose; 234 # determine whether the input is a file(name) or a stream 235 if ($input) { 236 if (ref(\$input) eq 'SCALAR') { 237 # we assume that a scalar is a filename 238 if ($file && ($file ne $input)) { 239 $self->throw("Input file given twice: '$file' and '$input' disagree"); 240 } 241 $file = $input; 242 } elsif (ref($input) && 243 ((ref($input) eq 'GLOB') || $input->isa('IO::Handle'))) { 244 # input is a stream 245 $fh = $input; 246 } else { 247 # let's be strict for now 248 $self->throw("Unable to determine type of input $input: ". 249 "not string and not GLOB"); 250 } 251 } 252 253 if (defined($file) && defined($fh)) { 254 $self->throw("Providing both a file and a filehandle for reading - ". 255 "only one please!"); 256 } 257 258 if ($string) { 259 if (defined($file) || defined($fh)) { 260 $self->throw("File or filehandle provided with -string, ". 261 "please unset if you are using -string as a file"); 262 } 263 open $fh, '<', \$string or $self->throw("Could not read string: $!"); 264 } 265 266 if (defined($file) && ($file ne '')) { 267 $self->file($file); 268 ($mode, $file) = $self->cleanfile; 269 $mode ||= '<'; 270 my $action = ($mode =~ m/>/) ? 'write' : 'read'; 271 $fh = Symbol::gensym(); 272 open $fh, $mode, $file or $self->throw("Could not $action file '$file': $!"); 273 } 274 275 if (defined $fh) { 276 # check filehandle to ensure it's one of: 277 # a GLOB reference, as in: open(my $fh, "myfile"); 278 # an IO::Handle or IO::String object 279 # the UNIVERSAL::can added to fix Bug2863 280 unless ( ( ref $fh and ( ref $fh eq 'GLOB' ) ) 281 or ( ref $fh and ( UNIVERSAL::can( $fh, 'can' ) ) 282 and ( $fh->isa('IO::Handle') 283 or $fh->isa('IO::String') ) ) 284 ) { 285 $self->throw("Object $fh does not appear to be a file handle"); 286 } 287 if ($HAS_EOL) { 288 binmode $fh, ':raw:eol(LF-Native)'; 289 } 290 $self->_fh($fh); # if $fh not provided, defaults to STDIN and STDOUT 291 } 292 293 $self->_flush_on_write(defined $flush ? $flush : 1); 294 295 return 1; 296} 297 298 299=head2 _fh 300 301 Title : _fh 302 Usage : $io->_fh($newval); 303 Function: Get or set the file handle for the stream encapsulated. 304 Args : Optional filehandle to use 305 Returns : Filehandle for the stream 306 307=cut 308 309sub _fh { 310 my ($self, $value) = @_; 311 if ( defined $value) { 312 $self->{'_filehandle'} = $value; 313 } 314 return $self->{'_filehandle'}; 315} 316 317 318=head2 mode 319 320 Title : mode 321 Usage : $io->mode(); 322 $io->mode(-force => 1); 323 Function: Determine if the object was opened for reading or writing 324 Args : -force: Boolean. Once mode() has been called, the mode is cached for 325 further calls to mode(). Use this argument to override this 326 behavior and re-check the object's mode. 327 Returns : Mode of the object: 328 'r' for readable 329 'w' for writable 330 'rw' for readable and writable 331 '?' if mode could not be determined (e.g. for a -url) 332 333=cut 334 335sub mode { 336 my ($self, %arg) = @_; 337 338 # Method 1: IO::Handle::fdopen 339 # my $iotest = new IO::Handle; 340 # $iotest->fdopen( dup(fileno($fh)) , 'r' ); 341 # if ($iotest->error == 0) { ... } 342 # It did not actually seem to work under any platform, since there would no 343 # error if the filehandle had been opened writable only. It could not be 344 # hacked around when dealing with unseekable (piped) filehandles. 345 346 # Method 2: readline, a.k.a. the <> operator 347 # no warnings "io"; 348 # my $line = <$fh>; 349 # if (defined $line) { 350 # $self->{'_mode'} = 'r'; 351 # ... 352 # It did not work well either because <> returns undef, i.e. querying the 353 # mode() after having read an entire file returned 'w'. 354 355 if ( $arg{-force} || not exists $self->{'_mode'} ) { 356 # Determine stream mode 357 my $mode; 358 my $fh = $self->_fh; 359 if (defined $fh) { 360 # use fcntl if not Windows-based 361 if ($^O !~ /MSWin32/) { 362 my $m = fcntl($fh, F_GETFL, 0) || 0; 363 $mode = exists $modes{$m & 3} ? $modes{$m & 3} : '?'; 364 } else { 365 # Determine read/write status of filehandle 366 no warnings 'io'; 367 if ( defined( read $fh, my $content, 0 ) ) { 368 # Successfully read 0 bytes 369 $mode = 'r' 370 } 371 if ( defined( syswrite $fh, '') ) { 372 # Successfully wrote 0 bytes 373 $mode ||= ''; 374 $mode .= 'w'; 375 } 376 } 377 } else { 378 # Stream does not have a filehandle... cannot determine mode 379 $mode = '?'; 380 } 381 # Save mode for future use 382 $self->{'_mode'} = $mode; 383 } 384 return $self->{'_mode'}; 385} 386 387 388=head2 file 389 390 Title : file 391 Usage : $io->file('>'.$file); 392 my $file = $io->file; 393 Function: Get or set the name of the file to read or write. 394 Args : Optional file name (including its mode, e.g. '<' for reading or '>' 395 for writing) 396 Returns : A string representing the filename and its mode. 397 398=cut 399 400sub file { 401 my ($self, $value) = @_; 402 if ( defined $value) { 403 $self->{'_file'} = $value; 404 } 405 return $self->{'_file'}; 406} 407 408 409=head2 cleanfile 410 411 Title : cleanfile 412 Usage : my ($mode, $file) = $io->cleanfile; 413 Function: Get the name of the file to read or write, stripped of its mode 414 ('>', '<', '+>', '>>', etc). 415 Args : None 416 Returns : In array context, an array of the mode and the clean filename. 417 418=cut 419 420sub cleanfile { 421 my ($self) = @_; 422 return ($self->{'_file'} =~ m/^ (\+?[><]{1,2})?\s*(.*) $/x); 423} 424 425 426=head2 format 427 428 Title : format 429 Usage : $io->format($newval) 430 Function: Get the format of a Bio::Root::IO sequence file or filehandle. Every 431 object inheriting Bio::Root::IO is guaranteed to have a format. 432 Args : None 433 Returns : Format of the file or filehandle, e.g. fasta, fastq, genbank, embl. 434 435=cut 436 437sub format { 438 my ($self) = @_; 439 my $format = (split '::', ref($self))[-1]; 440 return $format; 441} 442 443 444=head2 variant 445 446 Title : format 447 Usage : $io->format($newval) 448 Function: Get the variant of a Bio::Root::IO sequence file or filehandle. 449 The format variant depends on the specific format used. Note that 450 not all formats have variants. Also, the Bio::Root::IO-implementing 451 modules that require access to variants need to define a global hash 452 that has the allowed variants as its keys. 453 Args : None 454 Returns : Variant of the file or filehandle, e.g. sanger, solexa or illumina for 455 the fastq format, or undef for formats that do not have variants. 456 457=cut 458 459sub variant { 460 my ($self, $variant) = @_; 461 if (defined $variant) { 462 $variant = lc $variant; 463 my $var_name = '%'.ref($self).'::variant'; 464 my %ok_variants = eval $var_name; # e.g. %Bio::Assembly::IO::ace::variant 465 if (scalar keys %ok_variants == 0) { 466 $self->throw("Could not validate variant because global variant ". 467 "$var_name was not set or was empty\n"); 468 } 469 if (not exists $ok_variants{$variant}) { 470 $self->throw("$variant is not a valid variant of the " . 471 $self->format . ' format'); 472 } 473 $self->{variant} = $variant; 474 } 475 return $self->{variant}; 476} 477 478 479=head2 _print 480 481 Title : _print 482 Usage : $io->_print(@lines) 483 Function: Print lines of text to the IO stream object. 484 Args : List of strings to print 485 Returns : True on success, undef on failure 486 487=cut 488 489sub _print { 490 my $self = shift; 491 my $fh = $self->_fh() || \*STDOUT; 492 my $ret = print $fh @_; 493 return $ret; 494} 495 496 497=head2 _insert 498 499 Title : _insert 500 Usage : $io->_insert($string,1) 501 Function: Insert some text in a file at the given line number (1-based). 502 Args : * string to write in file 503 * line number to insert the string at 504 Returns : True 505 506=cut 507 508sub _insert { 509 my ($self, $string, $line_num) = @_; 510 # Line number check 511 if ($line_num < 1) { 512 $self->throw("Could not insert text at line $line_num: the minimum ". 513 "line number possible is 1."); 514 } 515 # File check 516 my ($mode, $file) = $self->cleanfile; 517 if (not defined $file) { 518 $self->throw('Could not insert a line: IO object was initialized with '. 519 'something else than a file.'); 520 } 521 # Everything that needs to be written is written before we read it 522 $self->flush; 523 524 # Edit the file line by line (no slurping) 525 $self->close; 526 my $temp_file; 527 my $number = 0; 528 while (-e "$file.$number.temp") { 529 $number++; 530 } 531 $temp_file = "$file.$number.temp"; 532 copy($file, $temp_file); 533 open my $fh1, '<', $temp_file or $self->throw("Could not read temporary file '$temp_file': $!"); 534 open my $fh2, '>', $file or $self->throw("Could not write file '$file': $!"); 535 while (my $line = <$fh1>) { 536 if ($. == $line_num) { # right line for new data 537 print $fh2 $string . $line; 538 } 539 else { 540 print $fh2 $line; 541 } 542 } 543 CORE::close $fh1; 544 CORE::close $fh2; 545 unlink $temp_file or $self->throw("Could not delete temporary file '$temp_file': $!"); 546 547 # Line number check (again) 548 if ( $. > 0 && $line_num > $. ) { 549 $self->throw("Could not insert text at line $line_num: there are only ". 550 "$. lines in file '$file'"); 551 } 552 # Re-open the file in append mode to be ready to add text at the end of it 553 # when the next _print() statement comes 554 open my $new_fh, '>>', $file or $self->throw("Could not append to file '$file': $!"); 555 $self->_fh($new_fh); 556 # If file is empty and we're inserting at line 1, simply append text to file 557 if ( $. == 0 && $line_num == 1 ) { 558 $self->_print($string); 559 } 560 return 1; 561} 562 563 564=head2 _readline 565 566 Title : _readline 567 Usage : local $Bio::Root::IO::HAS_EOL = 1; 568 my $io = Bio::Root::IO->new(-file => 'data.txt'); 569 my $line = $io->_readline(); 570 $io->close; 571 Function: Read a line of input and normalize all end of line characters. 572 573 End of line characters are typically "\n" on Linux platforms, "\r\n" 574 on Windows and "\r" on older Mac OS. By default, the _readline() 575 method uses the value of $/, Perl's input record separator, to 576 detect the end of each line. This means that you will not get the 577 expected lines if your input has Mac-formatted end of line characters. 578 Also, note that the current implementation does not handle pushed 579 back input correctly unless the pushed back input ends with the 580 value of $/. For each line parsed, its line ending, e.g. "\r\n" is 581 converted to "\n", unless you provide the -raw argument. 582 583 Altogether it is easier to let the PerlIO::eol module automatically 584 detect the proper end of line character and normalize it to "\n". Do 585 so by setting $Bio::Root::IO::HAS_EOL to 1. 586 587 Args : -raw : Avoid converting end of line characters to "\n" This option 588 has no effect when using $Bio::Root::IO::HAS_EOL = 1. 589 Returns : Line of input, or undef when there is nothing to read anymore 590 591=cut 592 593sub _readline { 594 my ($self, %param) = @_; 595 my $fh = $self->_fh or return; 596 my $line; 597 598 # if the buffer been filled by _pushback then return the buffer 599 # contents, rather than read from the filehandle 600 if( @{$self->{'_readbuffer'} || [] } ) { 601 $line = shift @{$self->{'_readbuffer'}}; 602 } else { 603 $line = <$fh>; 604 } 605 606 # Note: In Windows the "-raw" parameter has no effect, because Perl already discards 607 # the '\r' from the line when reading in text mode from the filehandle 608 # ($line = <$fh>), and put it back automatically when printing 609 if( !$HAS_EOL && !$param{-raw} && (defined $line) ) { 610 # don't strip line endings if -raw or $HAS_EOL is specified 611 $line =~ s/\015\012/\012/g; # Change all CR/LF pairs to LF 612 $line =~ tr/\015/\n/ unless $ONMAC; # Change all single CRs to NEWLINE 613 } 614 return $line; 615} 616 617 618=head2 _pushback 619 620 Title : _pushback 621 Usage : $io->_pushback($newvalue) 622 Function: Puts a line previously read with _readline back into a buffer. 623 buffer can hold as many lines as system memory permits. 624 625 Note that this is only supported for pushing back data ending with 626 the current, localized value of $/. Using this method to push 627 modified data back onto the buffer stack is not supported; see bug 628 843. 629 630 Args : newvalue 631 Returns : True 632 633=cut 634 635# fix for bug 843, this reveals some unsupported behavior 636 637#sub _pushback { 638# my ($self, $value) = @_; 639# if (index($value, $/) >= 0) { 640# push @{$self->{'_readbuffer'}}, $value; 641# } else { 642# $self->throw("Pushing modifed data back not supported: $value"); 643# } 644#} 645 646sub _pushback { 647 my ($self, $value) = @_; 648 return unless $value; 649 unshift @{$self->{'_readbuffer'}}, $value; 650 return 1; 651} 652 653 654=head2 close 655 656 Title : close 657 Usage : $io->close() 658 Function: Closes the file handle associated with this IO instance, 659 excepted if -noclose was specified. 660 Args : None 661 Returns : True 662 663=cut 664 665sub close { 666 my ($self) = @_; 667 668 # do not close if we explicitly asked not to 669 return if $self->noclose; 670 671 if( defined( my $fh = $self->{'_filehandle'} )) { 672 $self->flush; 673 return if ref $fh eq 'GLOB' && ( 674 \*STDOUT == $fh || \*STDERR == $fh || \*STDIN == $fh 675 ); 676 677 # don't close IO::Strings 678 CORE::close $fh unless ref $fh && $fh->isa('IO::String'); 679 } 680 $self->{'_filehandle'} = undef; 681 delete $self->{'_readbuffer'}; 682 return 1; 683} 684 685 686=head2 flush 687 688 Title : flush 689 Usage : $io->flush() 690 Function: Flushes the filehandle 691 Args : None 692 Returns : True 693 694=cut 695 696sub flush { 697 my ($self) = shift; 698 699 if( !defined $self->{'_filehandle'} ) { 700 $self->throw("Flush failed: no filehandle was active"); 701 } 702 703 if( ref($self->{'_filehandle'}) =~ /GLOB/ ) { 704 my $oldh = select($self->{'_filehandle'}); 705 $| = 1; 706 select($oldh); 707 } else { 708 $self->{'_filehandle'}->flush(); 709 } 710 return 1; 711} 712 713 714=head2 noclose 715 716 Title : noclose 717 Usage : $io->noclose($newval) 718 Function: Get or set the NOCLOSE flag - setting this to true will prevent a 719 filehandle from being closed when an object is cleaned up or 720 explicitly closed. 721 Args : Optional new value (a scalar or undef) 722 Returns : Value of noclose (a scalar) 723 724=cut 725 726sub noclose { 727 my $self = shift; 728 return $self->{'_noclose'} = shift if @_; 729 return $self->{'_noclose'}; 730} 731 732 733=head2 _io_cleanup 734 735=cut 736 737sub _io_cleanup { 738 my ($self) = @_; 739 $self->close(); 740 my $v = $self->verbose; 741 742 # we are planning to cleanup temp files no matter what 743 if ( exists($self->{'_rootio_tempfiles'}) 744 and ref($self->{'_rootio_tempfiles'}) =~ /array/i 745 and not $self->save_tempfiles 746 ) { 747 if( $v > 0 ) { 748 warn( "going to remove files ", 749 join(",", @{$self->{'_rootio_tempfiles'}}), 750 "\n"); 751 } 752 unlink (@{$self->{'_rootio_tempfiles'}} ); 753 } 754 # cleanup if we are not using File::Temp 755 if ( $self->{'_cleanuptempdir'} 756 and exists($self->{'_rootio_tempdirs'}) 757 and ref($self->{'_rootio_tempdirs'}) =~ /array/i 758 and not $self->save_tempfiles 759 ) { 760 if( $v > 0 ) { 761 warn( "going to remove dirs ", 762 join(",", @{$self->{'_rootio_tempdirs'}}), 763 "\n"); 764 } 765 $self->rmtree( $self->{'_rootio_tempdirs'}); 766 } 767} 768 769 770=head2 exists_exe 771 772 Title : exists_exe 773 Usage : $exists = $io->exists_exe('clustalw'); 774 $exists = Bio::Root::IO->exists_exe('clustalw') 775 $exists = Bio::Root::IO::exists_exe('clustalw') 776 Function: Determines whether the given executable exists either as file 777 or within the path environment. The latter requires File::Spec 778 to be installed. 779 On Win32-based system, .exe is automatically appended to the program 780 name unless the program name already ends in .exe. 781 Args : Name of the executable 782 Returns : 1 if the given program is callable as an executable, and 0 otherwise 783 784=cut 785 786sub exists_exe { 787 my ($self, $exe) = @_; 788 $self->throw("Must pass a defined value to exists_exe") unless defined $exe; 789 $exe = $self if (!(ref($self) || $exe)); 790 $exe .= '.exe' if(($^O =~ /mswin/i) && ($exe !~ /\.(exe|com|bat|cmd)$/i)); 791 return $exe if ( -f $exe && -x $exe ); # full path and exists 792 793 # Ewan's comment. I don't think we need this. People should not be 794 # asking for a program with a pathseparator starting it 795 # $exe =~ s/^$PATHSEP//; 796 797 # Not a full path, or does not exist. Let's see whether it's in the path. 798 if($FILESPECLOADED) { 799 for my $dir (File::Spec->path()) { 800 my $f = Bio::Root::IO->catfile($dir, $exe); 801 return $f if( -f $f && -x $f ); 802 } 803 } 804 return 0; 805} 806 807 808=head2 tempfile 809 810 Title : tempfile 811 Usage : my ($handle,$tempfile) = $io->tempfile(); 812 Function: Create a temporary filename and a handle opened for reading and 813 writing. 814 Caveats: If you do not have File::Temp on your system you should 815 avoid specifying TEMPLATE and SUFFIX. 816 Args : Named parameters compatible with File::Temp: DIR (defaults to 817 $Bio::Root::IO::TEMPDIR), TEMPLATE, SUFFIX. 818 Returns : A 2-element array, consisting of temporary handle and temporary 819 file name. 820 821=cut 822 823sub tempfile { 824 my ($self, @args) = @_; 825 my ($tfh, $file); 826 my %params = @args; 827 828 # map between naming with and without dash 829 for my $key (keys(%params)) { 830 if( $key =~ /^-/ ) { 831 my $v = $params{$key}; 832 delete $params{$key}; 833 $params{uc(substr($key,1))} = $v; 834 } else { 835 # this is to upper case 836 my $v = $params{$key}; 837 delete $params{$key}; 838 $params{uc($key)} = $v; 839 } 840 } 841 $params{'DIR'} = $TEMPDIR if(! exists($params{'DIR'})); 842 unless (exists $params{'UNLINK'} && 843 defined $params{'UNLINK'} && 844 ! $params{'UNLINK'} ) { 845 $params{'UNLINK'} = 1; 846 } else { 847 $params{'UNLINK'} = 0; 848 } 849 850 if($FILETEMPLOADED) { 851 if(exists($params{'TEMPLATE'})) { 852 my $template = $params{'TEMPLATE'}; 853 delete $params{'TEMPLATE'}; 854 ($tfh, $file) = File::Temp::tempfile($template, %params); 855 } else { 856 ($tfh, $file) = File::Temp::tempfile(%params); 857 } 858 } else { 859 my $dir = $params{'DIR'}; 860 $file = $self->catfile( 861 $dir, 862 (exists($params{'TEMPLATE'}) ? 863 $params{'TEMPLATE'} : 864 sprintf( "%s.%s.%s", $ENV{USER} || 'unknown', $$, $TEMPCOUNTER++)) 865 ); 866 867 # sneakiness for getting around long filenames on Win32? 868 if( $HAS_WIN32 ) { 869 $file = Win32::GetShortPathName($file); 870 } 871 872 # Try to make sure this will be marked close-on-exec 873 # XXX: Win32 doesn't respect this, nor the proper fcntl, 874 # but may have O_NOINHERIT. This may or may not be in Fcntl. 875 local $^F = 2; 876 # Store callers umask 877 my $umask = umask(); 878 # Set a known umaskr 879 umask(066); 880 # Attempt to open the file 881 if ( sysopen($tfh, $file, $OPENFLAGS, 0600) ) { 882 # Reset umask 883 umask($umask); 884 } else { 885 $self->throw("Could not write temporary file '$file': $!"); 886 } 887 } 888 889 if( $params{'UNLINK'} ) { 890 push @{$self->{'_rootio_tempfiles'}}, $file; 891 } 892 893 return wantarray ? ($tfh,$file) : $tfh; 894} 895 896 897=head2 tempdir 898 899 Title : tempdir 900 Usage : my ($tempdir) = $io->tempdir(CLEANUP=>1); 901 Function: Creates and returns the name of a new temporary directory. 902 903 Note that you should not use this function for obtaining "the" 904 temp directory. Use $Bio::Root::IO::TEMPDIR for that. Calling this 905 method will in fact create a new directory. 906 907 Args : args - ( key CLEANUP ) indicates whether or not to cleanup 908 dir on object destruction, other keys as specified by File::Temp 909 Returns : The name of a new temporary directory. 910 911=cut 912 913sub tempdir { 914 my ($self, @args) = @_; 915 if ($FILETEMPLOADED && File::Temp->can('tempdir')) { 916 return File::Temp::tempdir(@args); 917 } 918 919 # we have to do this ourselves, not good 920 # we are planning to cleanup temp files no matter what 921 my %params = @args; 922 print "cleanup is " . $params{CLEANUP} . "\n"; 923 $self->{'_cleanuptempdir'} = ( defined $params{CLEANUP} && 924 $params{CLEANUP} == 1); 925 my $tdir = $self->catfile( $TEMPDIR, 926 sprintf("dir_%s-%s-%s", 927 $ENV{USER} || 'unknown', 928 $$, 929 $TEMPCOUNTER++)); 930 mkdir($tdir, 0755); 931 push @{$self->{'_rootio_tempdirs'}}, $tdir; 932 return $tdir; 933} 934 935 936=head2 catfile 937 938 Title : catfile 939 Usage : $path = Bio::Root::IO->catfile(@dirs, $filename); 940 Function: Constructs a full pathname in a cross-platform safe way. 941 942 If File::Spec exists on your system, this routine will merely 943 delegate to it. Otherwise it tries to make a good guess. 944 945 You should use this method whenever you construct a path name 946 from directory and filename. Otherwise you risk cross-platform 947 compatibility of your code. 948 949 You can call this method both as a class and an instance method. 950 951 Args : components of the pathname (directories and filename, NOT an 952 extension) 953 Returns : a string 954 955=cut 956 957sub catfile { 958 my ($self, @args) = @_; 959 960 return File::Spec->catfile(@args) if $FILESPECLOADED; 961 # this is clumsy and not very appealing, but how do we specify the 962 # root directory? 963 if($args[0] eq '/') { 964 $args[0] = $ROOTDIR; 965 } 966 return join($PATHSEP, @args); 967} 968 969 970=head2 rmtree 971 972 Title : rmtree 973 Usage : Bio::Root::IO->rmtree($dirname ); 974 Function: Remove a full directory tree 975 976 If File::Path exists on your system, this routine will merely 977 delegate to it. Otherwise it runs a local version of that code. 978 979 You should use this method to remove directories which contain 980 files. 981 982 You can call this method both as a class and an instance method. 983 984 Args : roots - rootdir to delete or reference to list of dirs 985 986 verbose - a boolean value, which if TRUE will cause 987 C<rmtree> to print a message each time it 988 examines a file, giving the name of the file, and 989 indicating whether it's using C<rmdir> or 990 C<unlink> to remove it, or that it's skipping it. 991 (defaults to FALSE) 992 993 safe - a boolean value, which if TRUE will cause C<rmtree> 994 to skip any files to which you do not have delete 995 access (if running under VMS) or write access (if 996 running under another OS). This will change in the 997 future when a criterion for 'delete permission' 998 under OSs other than VMS is settled. (defaults to 999 FALSE) 1000 Returns : number of files successfully deleted 1001 1002=cut 1003 1004# taken straight from File::Path VERSION = "1.0403" 1005sub rmtree { 1006 my ($self, $roots, $verbose, $safe) = @_; 1007 if ( $FILEPATHLOADED ) { 1008 return File::Path::rmtree ($roots, $verbose, $safe); 1009 } 1010 1011 my $force_writable = ($^O eq 'os2' || $^O eq 'dos' || $^O eq 'MSWin32' || 1012 $^O eq 'amigaos' || $^O eq 'cygwin'); 1013 my $Is_VMS = $^O eq 'VMS'; 1014 1015 my @files; 1016 my $count = 0; 1017 $verbose ||= 0; 1018 $safe ||= 0; 1019 if ( defined($roots) && length($roots) ) { 1020 $roots = [$roots] unless ref $roots; 1021 } else { 1022 $self->warn("No root path(s) specified\n"); 1023 return 0; 1024 } 1025 1026 my $root; 1027 for $root (@{$roots}) { 1028 $root =~ s#/\z##; 1029 (undef, undef, my $rp) = lstat $root or next; 1030 $rp &= 07777; # don't forget setuid, setgid, sticky bits 1031 if ( -d _ ) { 1032 # notabene: 0777 is for making readable in the first place, 1033 # it's also intended to change it to writable in case we have 1034 # to recurse in which case we are better than rm -rf for 1035 # subtrees with strange permissions 1036 chmod(0777, ($Is_VMS ? VMS::Filespec::fileify($root) : $root)) 1037 or $self->warn("Could not make directory '$root' read+writable: $!") 1038 unless $safe; 1039 if (opendir DIR, $root){ 1040 @files = readdir DIR; 1041 closedir DIR; 1042 } else { 1043 $self->warn("Could not read directory '$root': $!"); 1044 @files = (); 1045 } 1046 1047 # Deleting large numbers of files from VMS Files-11 filesystems 1048 # is faster if done in reverse ASCIIbetical order 1049 @files = reverse @files if $Is_VMS; 1050 ($root = VMS::Filespec::unixify($root)) =~ s#\.dir\z## if $Is_VMS; 1051 @files = map("$root/$_", grep $_!~/^\.{1,2}\z/s,@files); 1052 $count += $self->rmtree([@files],$verbose,$safe); 1053 if ($safe && 1054 ($Is_VMS ? !&VMS::Filespec::candelete($root) : !-w $root)) { 1055 print "skipped '$root'\n" if $verbose; 1056 next; 1057 } 1058 chmod 0777, $root 1059 or $self->warn("Could not make directory '$root' writable: $!") 1060 if $force_writable; 1061 print "rmdir '$root'\n" if $verbose; 1062 if (rmdir $root) { 1063 ++$count; 1064 } 1065 else { 1066 $self->warn("Could not remove directory '$root': $!"); 1067 chmod($rp, ($Is_VMS ? VMS::Filespec::fileify($root) : $root)) 1068 or $self->warn("and can't restore permissions to " 1069 . sprintf("0%o",$rp) . "\n"); 1070 } 1071 } 1072 else { 1073 if ( $safe 1074 and ($Is_VMS ? !&VMS::Filespec::candelete($root) 1075 : !(-l $root || -w $root)) 1076 ) { 1077 print "skipped '$root'\n" if $verbose; 1078 next; 1079 } 1080 chmod 0666, $root 1081 or $self->warn( "Could not make file '$root' writable: $!") 1082 if $force_writable; 1083 warn "unlink '$root'\n" if $verbose; 1084 # delete all versions under VMS 1085 for (;;) { 1086 unless (unlink $root) { 1087 $self->warn("Could not unlink file '$root': $!"); 1088 if ($force_writable) { 1089 chmod $rp, $root 1090 or $self->warn("and can't restore permissions to " 1091 . sprintf("0%o",$rp) . "\n"); 1092 } 1093 last; 1094 } 1095 ++$count; 1096 last unless $Is_VMS && lstat $root; 1097 } 1098 } 1099 } 1100 1101 return $count; 1102} 1103 1104 1105=head2 _flush_on_write 1106 1107 Title : _flush_on_write 1108 Usage : $io->_flush_on_write($newval) 1109 Function: Boolean flag to indicate whether to flush 1110 the filehandle on writing when the end of 1111 a component is finished (Sequences, Alignments, etc) 1112 Args : Optional new value 1113 Returns : Value of _flush_on_write 1114 1115=cut 1116 1117sub _flush_on_write { 1118 my ($self, $value) = @_; 1119 if (defined $value) { 1120 $self->{'_flush_on_write'} = $value; 1121 } 1122 return $self->{'_flush_on_write'}; 1123} 1124 1125 1126=head2 save_tempfiles 1127 1128 Title : save_tempfiles 1129 Usage : $io->save_tempfiles(1) 1130 Function: Boolean flag to indicate whether to retain tempfiles/tempdir 1131 Args : Value evaluating to TRUE or FALSE 1132 Returns : Boolean value : 1 = save tempfiles/tempdirs, 0 = remove (default) 1133 1134=cut 1135 1136sub save_tempfiles { 1137 my $self = shift; 1138 if (@_) { 1139 my $value = shift; 1140 $self->{save_tempfiles} = $value ? 1 : 0; 1141 } 1142 return $self->{save_tempfiles} || 0; 1143} 1144 1145 11461; 1147