1# $Id: IMDB.pm 7370 2012-04-09 01:17:33Z chris $
2
3=head1 NAME
4
5WebService::IMDB - OO Perl interface to the Internet Movie Database imdb.com
6
7
8=head1 SYNOPSIS
9
10
11    use WebService::IMDB;
12
13    my $ws = WebService::IMDB->new(cache => 1, cache_exp => "12h");
14
15    my $movie = $ws->search(type => "Title", tconst => "tt0114814");
16
17    print $movie->title(), ": \n\n";
18    print $movie->synopsis(), "\n\n";
19
20    foreach ( @{$movie->cast_summary()} ) {
21        print $_->name()->name(), " : ", $_->char(), "\n";
22    }
23
24
25=head1 LEGAL
26
27The data accessed via this API is provided by IMDB, and is currently supplied
28with the following copyright notice.
29
30=over 4
31
32For use only by clients authorized in writing by IMDb.  Authors and users of unauthorized clients accept full legal exposure/liability for their actions.
33
34=back
35
36Anyone using WebService::IMDB must abide by the above requirements.
37
38=cut
39
40package WebService::IMDB;
41
42use strict;
43use warnings;
44
45our $VERSION = '0.05';
46
47use base qw(Class::Accessor);
48
49use Cache::FileCache;
50
51use Carp;
52
53use File::Spec::Functions qw(tmpdir);
54
55use HTTP::Request::Common;
56
57use JSON;
58
59use LWP::ConnCache;
60use LWP::UserAgent;
61
62use WebService::IMDB::Title;
63use WebService::IMDB::Name;
64
65use URI;
66
67__PACKAGE__->mk_accessors(qw(
68    _cache
69    _cache_exp
70    _cache_root
71    _cache_obj
72    _domain
73    _useragent
74));
75
76
77=head1 METHODS
78
79=head2 new(%opts)
80
81Constructor.
82
83%opts can contain:
84
85=over 4
86
87=item cache - Whether to cache responses.  Defaults to true
88
89=item cache_root - The root dir for the cache.  Defaults to tmpdir();
90
91=item cache_exp - How long to cache responses for.  Defaults to "1h"
92
93=item domain - Domain from which to request data.  Defaults to "app.imdb.com"
94
95=back
96
97=cut
98
99sub new {
100    my $class = shift;
101    my %args = @_;
102    my $self = {};
103
104    bless $self, $class;
105
106    $self->_cache_root($args{'cache_root'} || tmpdir());
107    $self->_cache_exp($args{'cache_exp'} || "1h");
108    $self->_cache(defined $args{'cache'} ? $args{'cache'} : 1);
109
110    $self->_domain($args{'domain'} || "app.imdb.com");
111
112    if ($self->_cache()) {
113	$self->_cache_obj( Cache::FileCache->new( {'cache_root' => $self->_cache_root(), 'namespace' => "WebService-IMDB", 'default_expires_in' => $self->_cache_exp()} ) );
114    }
115
116    $self->_useragent(LWP::UserAgent->new());
117    $self->_useragent()->env_proxy();
118    $self->_useragent()->agent($args{'agent'} || "WebService::IMDB/$VERSION");
119    $self->_useragent()->conn_cache(LWP::ConnCache->new());
120    $self->_useragent()->conn_cache()->total_capacity(3);
121
122    return $self;
123}
124
125
126=head2 search(%args)
127
128%args can contain:
129
130=over 4
131
132=item type - Resource type: "Title", "Name
133
134=item tconst - IMDB tconst e.g. "tt0000001" (Title)
135
136=item nconst - IMDB nconst e.g. "nm0000002" (Name)
137
138=item imdbid - More tolerant version of tconst, nconst e.g. "123", "0000456", "tt0000001", "nm0000002" (Title, Name)
139
140=back
141
142=cut
143
144sub search {
145    my $self = shift;
146    my $q = { @_ };
147
148    if (!exists $q->{'type'}) {
149	croak "TODO: Return generic resultset";
150    } elsif ($q->{'type'} eq "Title") {
151	delete $q->{'type'};
152	return WebService::IMDB::Title->_new($self, $q);
153    } elsif ($q->{'type'} eq "Name") {
154	delete $q->{'type'};
155	return WebService::IMDB::Name->_new($self, $q);
156    } else {
157	croak "Unknown resource type '" . $q->{'type'} . "'";
158    }
159
160}
161
162sub copyright {
163    my $self = shift;
164
165    my $request = GET sprintf("http://app.imdb.com/title/tt0033467/maindetails?ts=%d", time()); # Crude, use timestamp in query string to bypass our own caching
166
167    return $self->_response_copyright($request);
168
169}
170
171sub _request_cache_key {
172    my $request = shift;
173    my $type = shift;
174
175    my $version = "0"; # Use a version number as the first part of the key to avoid collisions should we change the structure of the key later.
176
177    # Using | as field separator as this shouldn't ever appear in a URL (or any of the other fields).
178    my $cache_key = $version . "|" . $type . "|" . $request->method() . "|" . $request->uri();
179    if ($request->method() eq "POST") {
180	$cache_key .= "|" . $request->content();
181    }
182
183    return $cache_key;
184}
185
186sub _response {
187    my $self = shift;
188    my $request = shift;
189    my $cacheCodes = shift || {'404' => 1}; # Only cache 404 responses by default
190
191    my $cache_key = _request_cache_key($request, "RESPONSE");
192
193    my $response;
194
195    if ($self->_cache()) {
196	$response = $self->_cache_obj()->get($cache_key);
197    }
198
199    if (!defined $response) {
200	$response = $self->_useragent()->request($request);
201
202	if ($self->_cache() && exists $cacheCodes->{$response->code()}) {
203	    $self->_cache_obj()->set($cache_key, $response);
204	}
205
206    }
207
208    return $response;
209
210}
211
212sub _response_decoded_content {
213    my $self = shift;
214    my $request = shift;
215
216    my $saveToCache = shift;
217    if (!defined $saveToCache) { $saveToCache = 1; }
218
219    my $cache_key = _request_cache_key($request, "DECODED_CONTENT");
220
221    my $content;
222
223    if ($self->_cache()) {
224	$content = $self->_cache_obj()->get($cache_key);
225    }
226
227    if (!defined $content) {
228
229	my $response = $self->_response($request);
230
231	if($response->code() ne "200") {
232	    croak "URL (", $request->uri(), ") Request Failed - Code: ", $response->code(), " Error: ", $response->message(), "\n";
233	}
234
235	$content = $response->decoded_content();
236
237	if ($self->_cache() && $saveToCache) {
238	    $self->_cache_obj()->set($cache_key, $content);
239	}
240    }
241
242    return $content;
243
244}
245
246sub _response_decoded_json {
247    my $self = shift;
248    my $request = shift;
249
250    my $content = $self->_response_decoded_content($request);
251
252    my $json = JSON->new();
253    $json->utf8(0);
254
255    my $resp = $json->decode($content);
256    # TODO: Honour $resp->{'exp'}, and check $resp->{'copyright'}
257
258    if (exists $resp->{'error'}) {
259	croak $resp->{'error'}->{'status'} . " " . $resp->{'error'}->{'code'} . ": " . $resp->{'error'}->{'message'};
260    } elsif (exists $resp->{'data'}) {
261	return $resp->{'data'};
262    } elsif (exists $resp->{'news'}) {
263	return $resp->{'news'};
264    } else {
265	croak "Failed to parse response";
266    }
267
268}
269
270sub _response_copyright {
271    my $self = shift;
272    my $request = shift;
273
274    my $content = $self->_response_decoded_content($request);
275
276    my $json = JSON->new();
277    $json->utf8(0);
278
279    my $resp = $json->decode($content);
280
281    return $resp->{'copyright'};
282
283}
284
2851;
286