1package Bio::Phylo::Parsers::Abstract;
2use strict;
3use warnings;
4use base 'Bio::Phylo::IO';
5use IO::Handle;
6use Bio::Phylo::Util::Exceptions 'throw';
7use Bio::Phylo::Util::CONSTANT '/looks_like/';
8use Bio::Phylo::Util::Logger ':simple';
9use Bio::Phylo::Factory;
10
11=head1 NAME
12
13Bio::Phylo::Parsers::Abstract - Superclass for parsers used by Bio::Phylo::IO
14
15=head1 DESCRIPTION
16
17This package is subclassed by all other packages within Bio::Phylo::Parsers::.*.
18There is no direct usage.
19
20=cut
21
22my $factory = Bio::Phylo::Factory->new;
23my $logger  = Bio::Phylo::Util::Logger->new;
24
25# argument is a file name, which we open
26sub _open_file {
27    my $file_name = shift;
28    my $encoding  = shift || '';
29    open my $handle, "<${encoding}", $file_name or throw 'FileError' => $!;
30    return $handle;
31}
32
33# argument is a string, which, at perl version >5.8,
34# we can treat as a handle by opening it by reference
35sub _open_string {
36    my $string_value = shift;
37    my $encoding     = shift || '';
38    open my $handle, "<${encoding}", \$string_value or throw 'FileError' => $!;
39    return $handle;
40}
41
42# argument is a url,
43sub _open_url {
44    my $url = shift;
45    my $encoding = shift || '';
46    my $handle;
47
48    # we need to use LWP::UserAgent to fetch the resource, but
49    # we don't "use" it at the top of the module because that
50    # would make it a required dependency
51    eval { require LWP::UserAgent };
52    if ($@) {
53        throw 'ExtensionError' =>
54            "Providing a -url argument requires\nsuccesful loading "
55          . "of LWP::UserAgent.\nHowever, there was an error when "
56          . "I\ntried that:\n"
57          . $@;
58    }
59
60    # apparently it's installed, so let's instantiate a client
61    my $ua = LWP::UserAgent->new;
62    $ua->timeout(10);
63    $ua->env_proxy;
64
65    # fetch the resource, get an HTTP::Response object
66    my $response = $ua->get($url);
67
68    # i.e. 200, or 304 (unchanged cache)
69    if ( $response->is_success or $response->status_line =~ /304/ ) {
70
71        # content is a string, so we create a handle in the same way
72        # as when the argument was a string
73        $handle = _open_string( $response->content, $encoding );
74    }
75    else {
76        throw 'NetworkError' => $response->status_line;
77    }
78    return $handle;
79}
80
81# deal with all possible data sources, return
82# a handle to whatever it is or throw an exception
83sub _open_handle {
84    my %args = @_;
85    my $handle;
86    if ( $args{'-handle'} ) {
87        binmode $args{'-handle'}, ":utf8";
88        $handle = $args{'-handle'};
89    }
90    elsif ( $args{'-file'} ) {
91        $handle = _open_file( $args{'-file'}, $args{'-encoding'} );
92    }
93    elsif ( $args{'-string'} ) {
94        $handle = _open_string( $args{'-string'}, $args{'-encoding'} );
95    }
96    elsif ( $args{'-url'} ) {
97        $handle = _open_url( $args{'-url'}, $args{'-encoding'} );
98    }
99    else {
100        throw 'BadArgs' => 'No data source provided!';
101    }
102
103    # check to see if the data source contains anything
104    #if ( eof $handle ) {
105    #    throw 'NoData' => "Source is empty!";
106    #}
107    return $handle;
108}
109
110# open a Bio::Phylo::Project if asked (if the -as_project flag
111# was provided.) If the user has supplied one (the -project flag)
112# simply return that or undefined otherwise.
113sub _open_project {
114    my ( $fac, %args ) = @_;
115    if ( $args{'-project'} ) {
116        return $args{'-project'};
117    }
118    elsif ( $args{'-as_project'} ) {
119        return $fac->create_project;
120    }
121    else {
122        return undef;
123    }
124}
125
126# this constructor is called by the Bio::Phylo::IO::parse
127# subroutine
128sub _new {
129    my $class = shift;
130    my %args  = looks_like_hash @_;
131
132    # we need to guess the format
133    if ( $class eq __PACKAGE__ ) {
134        if ( my $format = _guess_format(_open_handle(%args)) ) {
135            $class = 'Bio::Phylo::Parsers::' . ucfirst($format);
136            return looks_like_class($class)->_new(%args);
137        }
138        else {
139            throw 'BadArgs' => "No format specified and unable to guess!";
140        }
141    }
142
143    # factory is either user supplied or a private static
144    my $fac = $args{'-factory'} || $factory;
145
146    # values of these object fields will be accessed
147    # by child classes through the appropriate protected
148    # getters
149    return bless {
150        '_fac'      => $fac,
151        '_handle'   => _open_handle(%args),
152        '_proj'     => _open_project( $fac, %args ),
153        '_args'     => \%args, # for child-specific arguments
154        '_encoding' => $args{'-encoding'},
155        '_handlers' => $args{'-handlers'},
156        '_flush'    => $args{'-flush'},
157    }, $class;
158}
159
160# child classes can override this to specify
161# that their return value is a single scalar
162# (e.g. a tree block, as is the case for newick),
163# instead of an array of blocks
164sub _return_is_scalar { 0 }
165
166# this is called by Bio::Phylo::IO::parse, and
167# in turn it calls the _parse method of whatever
168# the concrete child instance is.
169sub _process {
170    my $self = shift;
171    if ( $self->_return_is_scalar ) {
172        my $result = $self->_parse;
173        if ( my $p = $self->_project ) {
174        	if ( my $meta = $self->_project_meta ) {
175        		$p->add_meta($_) for @{ $meta };
176        	}
177            return $p->insert($result);
178        }
179        else {
180            return $result;
181        }
182    }
183    else {
184        my @result = $self->_parse;
185        if ( my $p = $self->_project ) {
186        	if ( my $meta = $self->_project_meta ) {
187        		$p->add_meta($_) for @{ $meta };
188        	}
189            return $p->insert(@result);
190        }
191        else {
192            return [@result];
193        }
194    }
195}
196
197# once this is called, the handle will have read to
198# the end of the stream, so it needs to be rewound
199# if we want to read from the top
200sub _string {
201    my $self   = shift;
202    my $handle = $self->_handle;
203    my $string = do { local $/; <$handle> };
204    return $string;
205}
206sub _project_meta {};
207sub _logger   { $logger }
208sub _project  { shift->{'_proj'} }
209sub _handle   { shift->{'_handle'} }
210sub _factory  { shift->{'_fac'} }
211sub _args     { shift->{'_args'} }
212sub _encoding { shift->{'_encoding'} }
213sub _flush    { shift->{'_flush'} }
214sub _handlers {
215    my ( $self, $type ) = @_;
216    if ( my $h = $self->{'_handlers'} ) {
217        return defined $type ? $h->{$type} : $h;
218    }
219}
220
221sub _guess_format {
222    my $handle = shift;
223    my $line = $handle->getline;
224    my $format;
225    if ( $line =~ /^#nexus/i ) {
226        $format = 'nexus';
227    }
228    elsif ( $line =~ /^<[^>]*nexml/ ) {
229        $format = 'nexml';
230    }
231    elsif ( $line =~ /^<[^>]*phyloxml/ ) {
232        $format = 'phyloxml';
233    }
234    elsif ( $line =~ /^\s*\d+\s+\d+\s*$/ ) {
235        $format = 'phylip';
236    }
237    elsif ( $line =~ /^>/ ) {
238        $format = 'fasta';
239    }
240    elsif ( $line =~ /^\@/ ) {
241        $format = 'fastq';
242    }
243    elsif ( $line =~ /^\s*\(/ ) {
244        $format = 'newick';
245        if ( $line =~ /{/ ) {
246            $format = 'figtree';
247        }
248    }
249    elsif ( $line =~ /<\? xml/ ) {
250        $line = $handle;
251        if ( $line =~ /^<[^>]*nexml/ ) {
252            $format = 'nexml';
253        }
254        elsif ( $line =~ /^<[^>]*phyloxml/ ) {
255            $format = 'phyloxml';
256        }
257    }
258    seek( $handle, 0, 0 );
259    return $format;
260}
261
262# podinherit_insert_token
263
264=head1 SEE ALSO
265
266There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
267for any user or developer questions and discussions.
268
269=over
270
271=item L<Bio::Phylo::IO>
272
273The parsers are called by the L<Bio::Phylo::IO> object.
274Look there for examples.
275
276=item L<Bio::Phylo::Manual>
277
278Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.
279
280=back
281
282=head1 CITATION
283
284If you use Bio::Phylo in published research, please cite it:
285
286B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
287and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
288I<BMC Bioinformatics> B<12>:63.
289L<http://dx.doi.org/10.1186/1471-2105-12-63>
290
291=cut
292
2931;
294