1package Bio::Phylo::Parsers::Abstract; 2use strict; 3use warnings; 4use base 'Bio::Phylo::IO'; 5use IO::Handle; 6use Bio::Phylo::Util::Exceptions 'throw'; 7use Bio::Phylo::Util::CONSTANT '/looks_like/'; 8use Bio::Phylo::Util::Logger ':simple'; 9use Bio::Phylo::Factory; 10 11=head1 NAME 12 13Bio::Phylo::Parsers::Abstract - Superclass for parsers used by Bio::Phylo::IO 14 15=head1 DESCRIPTION 16 17This package is subclassed by all other packages within Bio::Phylo::Parsers::.*. 18There is no direct usage. 19 20=cut 21 22my $factory = Bio::Phylo::Factory->new; 23my $logger = Bio::Phylo::Util::Logger->new; 24 25# argument is a file name, which we open 26sub _open_file { 27 my $file_name = shift; 28 my $encoding = shift || ''; 29 open my $handle, "<${encoding}", $file_name or throw 'FileError' => $!; 30 return $handle; 31} 32 33# argument is a string, which, at perl version >5.8, 34# we can treat as a handle by opening it by reference 35sub _open_string { 36 my $string_value = shift; 37 my $encoding = shift || ''; 38 open my $handle, "<${encoding}", \$string_value or throw 'FileError' => $!; 39 return $handle; 40} 41 42# argument is a url, 43sub _open_url { 44 my $url = shift; 45 my $encoding = shift || ''; 46 my $handle; 47 48 # we need to use LWP::UserAgent to fetch the resource, but 49 # we don't "use" it at the top of the module because that 50 # would make it a required dependency 51 eval { require LWP::UserAgent }; 52 if ($@) { 53 throw 'ExtensionError' => 54 "Providing a -url argument requires\nsuccesful loading " 55 . "of LWP::UserAgent.\nHowever, there was an error when " 56 . "I\ntried that:\n" 57 . $@; 58 } 59 60 # apparently it's installed, so let's instantiate a client 61 my $ua = LWP::UserAgent->new; 62 $ua->timeout(10); 63 $ua->env_proxy; 64 65 # fetch the resource, get an HTTP::Response object 66 my $response = $ua->get($url); 67 68 # i.e. 200, or 304 (unchanged cache) 69 if ( $response->is_success or $response->status_line =~ /304/ ) { 70 71 # content is a string, so we create a handle in the same way 72 # as when the argument was a string 73 $handle = _open_string( $response->content, $encoding ); 74 } 75 else { 76 throw 'NetworkError' => $response->status_line; 77 } 78 return $handle; 79} 80 81# deal with all possible data sources, return 82# a handle to whatever it is or throw an exception 83sub _open_handle { 84 my %args = @_; 85 my $handle; 86 if ( $args{'-handle'} ) { 87 binmode $args{'-handle'}, ":utf8"; 88 $handle = $args{'-handle'}; 89 } 90 elsif ( $args{'-file'} ) { 91 $handle = _open_file( $args{'-file'}, $args{'-encoding'} ); 92 } 93 elsif ( $args{'-string'} ) { 94 $handle = _open_string( $args{'-string'}, $args{'-encoding'} ); 95 } 96 elsif ( $args{'-url'} ) { 97 $handle = _open_url( $args{'-url'}, $args{'-encoding'} ); 98 } 99 else { 100 throw 'BadArgs' => 'No data source provided!'; 101 } 102 103 # check to see if the data source contains anything 104 #if ( eof $handle ) { 105 # throw 'NoData' => "Source is empty!"; 106 #} 107 return $handle; 108} 109 110# open a Bio::Phylo::Project if asked (if the -as_project flag 111# was provided.) If the user has supplied one (the -project flag) 112# simply return that or undefined otherwise. 113sub _open_project { 114 my ( $fac, %args ) = @_; 115 if ( $args{'-project'} ) { 116 return $args{'-project'}; 117 } 118 elsif ( $args{'-as_project'} ) { 119 return $fac->create_project; 120 } 121 else { 122 return undef; 123 } 124} 125 126# this constructor is called by the Bio::Phylo::IO::parse 127# subroutine 128sub _new { 129 my $class = shift; 130 my %args = looks_like_hash @_; 131 132 # we need to guess the format 133 if ( $class eq __PACKAGE__ ) { 134 if ( my $format = _guess_format(_open_handle(%args)) ) { 135 $class = 'Bio::Phylo::Parsers::' . ucfirst($format); 136 return looks_like_class($class)->_new(%args); 137 } 138 else { 139 throw 'BadArgs' => "No format specified and unable to guess!"; 140 } 141 } 142 143 # factory is either user supplied or a private static 144 my $fac = $args{'-factory'} || $factory; 145 146 # values of these object fields will be accessed 147 # by child classes through the appropriate protected 148 # getters 149 return bless { 150 '_fac' => $fac, 151 '_handle' => _open_handle(%args), 152 '_proj' => _open_project( $fac, %args ), 153 '_args' => \%args, # for child-specific arguments 154 '_encoding' => $args{'-encoding'}, 155 '_handlers' => $args{'-handlers'}, 156 '_flush' => $args{'-flush'}, 157 }, $class; 158} 159 160# child classes can override this to specify 161# that their return value is a single scalar 162# (e.g. a tree block, as is the case for newick), 163# instead of an array of blocks 164sub _return_is_scalar { 0 } 165 166# this is called by Bio::Phylo::IO::parse, and 167# in turn it calls the _parse method of whatever 168# the concrete child instance is. 169sub _process { 170 my $self = shift; 171 if ( $self->_return_is_scalar ) { 172 my $result = $self->_parse; 173 if ( my $p = $self->_project ) { 174 if ( my $meta = $self->_project_meta ) { 175 $p->add_meta($_) for @{ $meta }; 176 } 177 return $p->insert($result); 178 } 179 else { 180 return $result; 181 } 182 } 183 else { 184 my @result = $self->_parse; 185 if ( my $p = $self->_project ) { 186 if ( my $meta = $self->_project_meta ) { 187 $p->add_meta($_) for @{ $meta }; 188 } 189 return $p->insert(@result); 190 } 191 else { 192 return [@result]; 193 } 194 } 195} 196 197# once this is called, the handle will have read to 198# the end of the stream, so it needs to be rewound 199# if we want to read from the top 200sub _string { 201 my $self = shift; 202 my $handle = $self->_handle; 203 my $string = do { local $/; <$handle> }; 204 return $string; 205} 206sub _project_meta {}; 207sub _logger { $logger } 208sub _project { shift->{'_proj'} } 209sub _handle { shift->{'_handle'} } 210sub _factory { shift->{'_fac'} } 211sub _args { shift->{'_args'} } 212sub _encoding { shift->{'_encoding'} } 213sub _flush { shift->{'_flush'} } 214sub _handlers { 215 my ( $self, $type ) = @_; 216 if ( my $h = $self->{'_handlers'} ) { 217 return defined $type ? $h->{$type} : $h; 218 } 219} 220 221sub _guess_format { 222 my $handle = shift; 223 my $line = $handle->getline; 224 my $format; 225 if ( $line =~ /^#nexus/i ) { 226 $format = 'nexus'; 227 } 228 elsif ( $line =~ /^<[^>]*nexml/ ) { 229 $format = 'nexml'; 230 } 231 elsif ( $line =~ /^<[^>]*phyloxml/ ) { 232 $format = 'phyloxml'; 233 } 234 elsif ( $line =~ /^\s*\d+\s+\d+\s*$/ ) { 235 $format = 'phylip'; 236 } 237 elsif ( $line =~ /^>/ ) { 238 $format = 'fasta'; 239 } 240 elsif ( $line =~ /^\@/ ) { 241 $format = 'fastq'; 242 } 243 elsif ( $line =~ /^\s*\(/ ) { 244 $format = 'newick'; 245 if ( $line =~ /{/ ) { 246 $format = 'figtree'; 247 } 248 } 249 elsif ( $line =~ /<\? xml/ ) { 250 $line = $handle; 251 if ( $line =~ /^<[^>]*nexml/ ) { 252 $format = 'nexml'; 253 } 254 elsif ( $line =~ /^<[^>]*phyloxml/ ) { 255 $format = 'phyloxml'; 256 } 257 } 258 seek( $handle, 0, 0 ); 259 return $format; 260} 261 262# podinherit_insert_token 263 264=head1 SEE ALSO 265 266There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> 267for any user or developer questions and discussions. 268 269=over 270 271=item L<Bio::Phylo::IO> 272 273The parsers are called by the L<Bio::Phylo::IO> object. 274Look there for examples. 275 276=item L<Bio::Phylo::Manual> 277 278Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>. 279 280=back 281 282=head1 CITATION 283 284If you use Bio::Phylo in published research, please cite it: 285 286B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen> 287and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl. 288I<BMC Bioinformatics> B<12>:63. 289L<http://dx.doi.org/10.1186/1471-2105-12-63> 290 291=cut 292 2931; 294