1# Copyright (c) 2004-2009 Timothy Appnel
2# http://appnel.com/
3# This code is released under the Artistic License.
4#
5# XML::RAI - RSS Abstraction Interface.
6#
7
8package XML::RAI;
9
10use strict;
11
12use vars qw($VERSION);
13$VERSION = 1.3031;
14
15use XML::RSS::Parser 4.0;
16use XML::RAI::Channel;
17use XML::RAI::Item;
18use XML::RAI::Image;
19
20use constant W3CDTF    => '%Y-%m-%dT%H:%M:%S%z';    # AKA...
21use constant RFC8601   => W3CDTF;
22use constant RFC822    => '%a, %d %b %G %T %Z';
23use constant PASS_THRU => '';
24use constant EPOCH     => 'EPOCH';
25
26sub new {
27    my $class = shift;
28    my $self = bless {}, $class;
29    $self->init(@_);
30    $self;
31}
32
33sub init {
34    my $self = shift;
35    my $doc;
36    unless (ref($_[0]) eq 'XML::RSS::Parser::Feed') {
37        my ($method, @r) = @_;
38        my $parser = XML::RSS::Parser->new;
39        $doc = $parser->$method(@r) or die $parser->errstr;
40    }
41    else {
42        $doc = shift;
43    }
44    $self->{__doc} = $doc;
45    my $channel = $self->{__channel} =
46      XML::RAI::Channel->new($doc->channel, $self);
47    my @items = map { XML::RAI::Item->new($_, $channel) } $doc->items;
48    $self->{__items} = \@items;
49    my @imgs = $doc->image;    # fix multiple image bug ala slashdot.
50    $self->{__image} = XML::RAI::Image->new($imgs[0], $channel)
51      if $doc->image;
52    $self->{__timef} = W3CDTF;
53}
54
55sub time_format {
56    $_[0]->{__timef} = $_[1] if defined $_[1];
57    $_[0]->{__timef};
58}
59
60sub parse {
61    my $class = shift;
62    if (ref($_[0]) eq 'GLOB') {    # is filehandle
63        $class->parse_file(@_);
64    }
65    else {                         # is string
66        $class->parse_string(@_);
67    }
68}
69
70sub parsefile {
71    my $class = shift;
72    $class->new('parse_file', @_) or die $class->errstr;
73}
74*parse_file = \&parsefile;
75
76sub parse_string {
77    my $class = shift;
78    $class->new('parse_string', @_) or die $class->errstr;
79}
80
81sub parse_uri {
82    my $class = shift;
83    $class->new('parse_uri', @_) or die $class->errstr;
84}
85
86sub document   { $_[0]->{__doc}; }
87sub channel    { $_[0]->{__channel}; }
88sub items      { $_[0]->{__items}; }
89sub item_count { scalar @{$_[0]->{__items}}; }
90sub image      { $_[0]->{__image}; }
91
921;
93
94__END__
95
96=begin
97
98=head1 NAME
99
100XML::RAI - RSS Abstraction Interface.
101
102=head1 SYNOPSIS
103
104 #!/usr/bin/perl -w
105 use strict;
106 use XML::RAI;
107 my $doc = <<DOC;
108 <?xml version="1.0" encoding="iso-8859-1"?>
109 <rss xmlns:dc="http://purl.org/dc/elements/1.1/"
110     xmlns="http://purl.org/rss/1.0/">
111     <channel>
112         <title>tima thinking outloud</title>
113         <link>http://www.timaoutloud.org/</link>
114         <description></description>
115         <dc:language>en-us</dc:language>
116         <item>
117             <title>His and Hers Weblogs.</title>
118             <description>First it was his and hers Powerbooks. Now
119             its weblogs. There goes the neighborhood.</description>
120             <link>http://www.timaoutloud.org/archives/000338.html</link>
121             <dc:subject>Musings</dc:subject>
122             <dc:creator>tima</dc:creator>
123             <dc:date>2004-01-23T12:33:22-05:00</dc:date>
124         </item>
125         <item>
126             <title>Commercial Music Again.</title>
127             <description>Last year I made a post about music used
128             in TV commercials that I recognized and have been listening to.
129             For all the posts I made about technology and other bits of sagely
130             wisdom the one on commercial music got the most traffic of any
131             each month. I need a new top post. Here are some more tunes that
132             have appeared in commercials.</description>
133             <guid isPermaLink="true">
134               http://www.timaoutloud.org/archives/000337.html
135             </guid>
136             <category>Musings</category>
137             <author>tima</author>
138             <pubDate>Sun, 18 Jan 2004 14:09:03 GMT</pubDate>
139         </item>
140     </channel>
141 </rss>
142 DOC
143
144 # The above is to demonstrate the value of RAI. It is not any
145 # specific RSS format, nor does it exercise best practices.
146
147 my $rai = XML::RAI->parse_string($doc);
148 print $rai->channel->title."\n\n";
149 foreach my $item ( @{$rai->items} ) {
150    print $item->title."\n";
151    print $item->link."\n";
152    print $item->content."\n";
153    print $item->issued."\n\n";
154 }
155
156=head1 DESCRIPTION
157
158The RSS Abstraction Interface, or RAI (said "ray"), provides an
159object-oriented interface to XML::RSS::Parser trees that abstracts
160the user from handling namespaces, overlapping and alternate tag
161mappings.
162
163It's rather well known that, while popular, the RSS syntax is a bit
164of a mess. Anyone who has attempted to write software that consumes
165RSS feeds "in the wild" can attest to the headaches in handling the
166many formats and interpretations that are in use. For instance, in
167"The myth of RSS compatibility"
168L<http://diveintomark.org/archives/2004/02/04/incompatible-rss>
169Mark Pilgrim identifies 9 different versions of RSS (there are 10
170actually[1]) and that is not without going into tags with
171overlapping purposes. Even the acronym RSS has multiple though
172similar meanings.
173
174The L<XML::RSS::Parser> alone attempts to help developers cope with these
175issues through a liberal interpretation of what is RSS and routines
176to normalize the parse tree into a more common and manageable form.
177
178RAI takes this one step further. Its intent is to give a developer
179the means to not have to care about what tags the feed uses to
180present its meta data.
181
182RAI provides a single simplified interface that maps one method
183call to various overlapping and alternate tags used in RSS feeds.
184The interface also abstracts developers from needing to deal with
185namespaces. Method names are based on Dublin Core terminology.
186
187With the release of version 1.0, the L<XML::RSS::Parser>
188distribution was folded into XML::RAI.
189
190[1] When initially released, RSS 2.0 had a namespace. When it was
191reported a few days later that some XSLT-based systems were
192breaking because of the change in the RSS namespace from "" (none)
193to http://backend.userland.com/rss2, the namespace was removed, but
194the version number was not incremented making it incompatible with
195itself. L<http://groups.yahoo.com/group/rss-dev/message/4113> This
196version was not counted in Mark's post.
197
198=head1 METHODS
199
200=item XML::RAI->new($rss_tree)
201
202Returns a populated RAI instance based on the
203L<XML::RSS::Parser::Feed> object passed in.
204
205=item XML::RAI->parse($string_or_file_handle)
206
207Passes through the string or file handle to the C<parse>
208method to either C<parse_file> or C<parse_string> in
209L<XML::RSS::Parser>. Returns a populated RAI instance.
210
211To maintain backwards compatability this method is B<not> inherited
212from the underlying SAX implementation.
213
214=item XML::RAI->parse_file
215
216=item XML::RAI->parse_string
217
218=item XML::RAI->parse_uri
219
220A pass-thru to the underlying SAX implentation. See L<XML::SAX::Base> for
221more on these methods.
222
223=item $rai->document
224
225Returns the L<XML::RSS::Parser> parse tree being used as the source
226for the RAI object
227
228=item $rai->channel
229
230Returns the L<XML::RAI::Channel> object.
231
232=item $rai->items
233
234Returns an array reference containing the L<XML::RAI::Item> objects
235for the feed
236
237=item $rai->item_count
238
239Returns the number of items as an integer.
240
241=item $rai->image
242
243Returns the L<XML::RAI::Image> object, if any. (Many feeds do not
244have an image block.)
245
246=item $rai->time_format($timef)
247
248Sets the timestamp normalization format. RAI will attempt to parse
249the string into a data value and will output timestamp (date)
250values in this format.
251
252RAI implements a few constants with common RSS timestamp formatting
253strings:
254
255 W3CDTF     1999-09-01T22:10:40Z
256 RFC8601    (other name for W3CDTF)
257 RFC822     Wed, 01 Sep 1999 22:10:40 GMT
258 EPOCH      (Seconds since system epoch.)
259 PASS_THRU  (timestamp as it appear in the source. does not normalize.)
260
261W3CDTF/RFC8601 is the default. For more detail on creating your own
262timestamp formats see the manpage for the C<strftime> command.
263
264=head1 PLUGINS
265
266With the introduction of the C<add_mapping> and the
267C<register_ns_prefix> method in the underlying
268L<XML::RSS::Parser>, RAI now has a plugin API for easily
269extending its mappings.
270
271To create a RAI plugin module, simply create a package with
272an C<import> method that makes all of the necessary
273C<add_mapping> and C<register_ns_prefix> calls. For an
274example plugin module see L<XML::RAI::TrackBack>
275
276=head1 DEPENDENCIES
277
278L<XML::RSS::Parser> 4.0, L<Date::Parse> 2.26, L<Date::Format> 2.22
279
280=head1 TO DO
281
282=over
283
284=item * Add Atom elements into mappings.
285
286=item * Serialization module(s).
287
288=item * DATETIME (L<DateTime> object) constants and functionality
289for C<time_format>.
290
291=back
292
293=head1 PARTICIPATION
294
295I welcome and accept patches in diff format. If you wish to
296hack on this code, please fork the git repository found at:
297L<http://github.com/tima/perl-xml-rai/>
298
299If you have something to push back to my repository, just
300use the "pull request" button on the github site.
301
302=head1 LICENSE
303
304The software is released under the Artistic License. The terms of
305the Artistic License are described at
306L<http://www.perl.com/language/misc/Artistic.html>.
307
308=head1 AUTHOR & COPYRIGHT
309
310Except where otherwise noted, XML::RAI is Copyright
3112003-2009, Timothy Appnel, tima@cpan.org. All rights
312reserved.
313
314=cut
315
316=end
317