1# $Id: IMDB.pm 7370 2012-04-09 01:17:33Z chris $ 2 3=head1 NAME 4 5WebService::IMDB - OO Perl interface to the Internet Movie Database imdb.com 6 7 8=head1 SYNOPSIS 9 10 11 use WebService::IMDB; 12 13 my $ws = WebService::IMDB->new(cache => 1, cache_exp => "12h"); 14 15 my $movie = $ws->search(type => "Title", tconst => "tt0114814"); 16 17 print $movie->title(), ": \n\n"; 18 print $movie->synopsis(), "\n\n"; 19 20 foreach ( @{$movie->cast_summary()} ) { 21 print $_->name()->name(), " : ", $_->char(), "\n"; 22 } 23 24 25=head1 LEGAL 26 27The data accessed via this API is provided by IMDB, and is currently supplied 28with the following copyright notice. 29 30=over 4 31 32For use only by clients authorized in writing by IMDb. Authors and users of unauthorized clients accept full legal exposure/liability for their actions. 33 34=back 35 36Anyone using WebService::IMDB must abide by the above requirements. 37 38=cut 39 40package WebService::IMDB; 41 42use strict; 43use warnings; 44 45our $VERSION = '0.05'; 46 47use base qw(Class::Accessor); 48 49use Cache::FileCache; 50 51use Carp; 52 53use File::Spec::Functions qw(tmpdir); 54 55use HTTP::Request::Common; 56 57use JSON; 58 59use LWP::ConnCache; 60use LWP::UserAgent; 61 62use WebService::IMDB::Title; 63use WebService::IMDB::Name; 64 65use URI; 66 67__PACKAGE__->mk_accessors(qw( 68 _cache 69 _cache_exp 70 _cache_root 71 _cache_obj 72 _domain 73 _useragent 74)); 75 76 77=head1 METHODS 78 79=head2 new(%opts) 80 81Constructor. 82 83%opts can contain: 84 85=over 4 86 87=item cache - Whether to cache responses. Defaults to true 88 89=item cache_root - The root dir for the cache. Defaults to tmpdir(); 90 91=item cache_exp - How long to cache responses for. Defaults to "1h" 92 93=item domain - Domain from which to request data. Defaults to "app.imdb.com" 94 95=back 96 97=cut 98 99sub new { 100 my $class = shift; 101 my %args = @_; 102 my $self = {}; 103 104 bless $self, $class; 105 106 $self->_cache_root($args{'cache_root'} || tmpdir()); 107 $self->_cache_exp($args{'cache_exp'} || "1h"); 108 $self->_cache(defined $args{'cache'} ? $args{'cache'} : 1); 109 110 $self->_domain($args{'domain'} || "app.imdb.com"); 111 112 if ($self->_cache()) { 113 $self->_cache_obj( Cache::FileCache->new( {'cache_root' => $self->_cache_root(), 'namespace' => "WebService-IMDB", 'default_expires_in' => $self->_cache_exp()} ) ); 114 } 115 116 $self->_useragent(LWP::UserAgent->new()); 117 $self->_useragent()->env_proxy(); 118 $self->_useragent()->agent($args{'agent'} || "WebService::IMDB/$VERSION"); 119 $self->_useragent()->conn_cache(LWP::ConnCache->new()); 120 $self->_useragent()->conn_cache()->total_capacity(3); 121 122 return $self; 123} 124 125 126=head2 search(%args) 127 128%args can contain: 129 130=over 4 131 132=item type - Resource type: "Title", "Name 133 134=item tconst - IMDB tconst e.g. "tt0000001" (Title) 135 136=item nconst - IMDB nconst e.g. "nm0000002" (Name) 137 138=item imdbid - More tolerant version of tconst, nconst e.g. "123", "0000456", "tt0000001", "nm0000002" (Title, Name) 139 140=back 141 142=cut 143 144sub search { 145 my $self = shift; 146 my $q = { @_ }; 147 148 if (!exists $q->{'type'}) { 149 croak "TODO: Return generic resultset"; 150 } elsif ($q->{'type'} eq "Title") { 151 delete $q->{'type'}; 152 return WebService::IMDB::Title->_new($self, $q); 153 } elsif ($q->{'type'} eq "Name") { 154 delete $q->{'type'}; 155 return WebService::IMDB::Name->_new($self, $q); 156 } else { 157 croak "Unknown resource type '" . $q->{'type'} . "'"; 158 } 159 160} 161 162sub copyright { 163 my $self = shift; 164 165 my $request = GET sprintf("http://app.imdb.com/title/tt0033467/maindetails?ts=%d", time()); # Crude, use timestamp in query string to bypass our own caching 166 167 return $self->_response_copyright($request); 168 169} 170 171sub _request_cache_key { 172 my $request = shift; 173 my $type = shift; 174 175 my $version = "0"; # Use a version number as the first part of the key to avoid collisions should we change the structure of the key later. 176 177 # Using | as field separator as this shouldn't ever appear in a URL (or any of the other fields). 178 my $cache_key = $version . "|" . $type . "|" . $request->method() . "|" . $request->uri(); 179 if ($request->method() eq "POST") { 180 $cache_key .= "|" . $request->content(); 181 } 182 183 return $cache_key; 184} 185 186sub _response { 187 my $self = shift; 188 my $request = shift; 189 my $cacheCodes = shift || {'404' => 1}; # Only cache 404 responses by default 190 191 my $cache_key = _request_cache_key($request, "RESPONSE"); 192 193 my $response; 194 195 if ($self->_cache()) { 196 $response = $self->_cache_obj()->get($cache_key); 197 } 198 199 if (!defined $response) { 200 $response = $self->_useragent()->request($request); 201 202 if ($self->_cache() && exists $cacheCodes->{$response->code()}) { 203 $self->_cache_obj()->set($cache_key, $response); 204 } 205 206 } 207 208 return $response; 209 210} 211 212sub _response_decoded_content { 213 my $self = shift; 214 my $request = shift; 215 216 my $saveToCache = shift; 217 if (!defined $saveToCache) { $saveToCache = 1; } 218 219 my $cache_key = _request_cache_key($request, "DECODED_CONTENT"); 220 221 my $content; 222 223 if ($self->_cache()) { 224 $content = $self->_cache_obj()->get($cache_key); 225 } 226 227 if (!defined $content) { 228 229 my $response = $self->_response($request); 230 231 if($response->code() ne "200") { 232 croak "URL (", $request->uri(), ") Request Failed - Code: ", $response->code(), " Error: ", $response->message(), "\n"; 233 } 234 235 $content = $response->decoded_content(); 236 237 if ($self->_cache() && $saveToCache) { 238 $self->_cache_obj()->set($cache_key, $content); 239 } 240 } 241 242 return $content; 243 244} 245 246sub _response_decoded_json { 247 my $self = shift; 248 my $request = shift; 249 250 my $content = $self->_response_decoded_content($request); 251 252 my $json = JSON->new(); 253 $json->utf8(0); 254 255 my $resp = $json->decode($content); 256 # TODO: Honour $resp->{'exp'}, and check $resp->{'copyright'} 257 258 if (exists $resp->{'error'}) { 259 croak $resp->{'error'}->{'status'} . " " . $resp->{'error'}->{'code'} . ": " . $resp->{'error'}->{'message'}; 260 } elsif (exists $resp->{'data'}) { 261 return $resp->{'data'}; 262 } elsif (exists $resp->{'news'}) { 263 return $resp->{'news'}; 264 } else { 265 croak "Failed to parse response"; 266 } 267 268} 269 270sub _response_copyright { 271 my $self = shift; 272 my $request = shift; 273 274 my $content = $self->_response_decoded_content($request); 275 276 my $json = JSON->new(); 277 $json->utf8(0); 278 279 my $resp = $json->decode($content); 280 281 return $resp->{'copyright'}; 282 283} 284 2851; 286