1=head1 NAME 2 3IMDB::Persons - Perl extension for retrieving movies persons 4from IMDB.com 5 6=head1 SYNOPSIS 7 8 use IMDB::Persons; 9 10 # 11 # Retrieve a person information by IMDB code 12 # 13 my $person = new IMDB::Persons(crit => '0000129'); 14 15 or 16 17 # 18 # Retrieve a person information by name 19 # 20 my $person = new IMDB::Persons(crit => 'Tom Cruise'); 21 22 or 23 24 # 25 # Process already stored HTML page from IMDB 26 # 27 my $person = new IMDB::Persons(file => 'imdb.html'); 28 29 if($person->status) { 30 print "Name: ".$person->name."\n"; 31 print "Birth Date: ".$person->date_of_birth."\n"; 32 } else { 33 print "Something wrong: ".$person->error."!\n"; 34 } 35 36=head1 DESCRIPTION 37 38IMDB::Persons allows to retrieve an information about 39IMDB persons (actors, actresses, directors etc): full name, 40photo, date and place of birth, mini bio and filmography. 41 42=cut 43 44package IMDB::Persons; 45 46use strict; 47use warnings; 48 49use Carp; 50 51use Data::Dumper; 52 53use base qw(IMDB::BaseClass); 54 55use fields qw( _name 56 _date_of_birth 57 _place_of_birth 58 _photo 59 _mini_bio 60 _filmography_types 61 _filmography 62 _genres 63 _plot_keywords 64 ); 65 66use vars qw($VERSION %FIELDS); 67 68use constant FORCED => 1; 69use constant CLASS_NAME => 'IMDB::Persons'; 70use constant MAIN_TAG => 'h4'; 71 72BEGIN { 73 $VERSION = '0.53'; 74} 75 76{ 77 my %_defaults = ( 78 cache => 0, 79 debug => 0, 80 error => [], 81 matched => [], 82 cache_exp => '1 h', 83 host => 'www.imdb.com', 84 query => 'name/nm', 85 search => 'find?nm=on&mx=20&q=', 86 status => 0, 87 timeout => 10, 88 user_agent => 'Mozilla/5.0', 89 ); 90 91 sub _get_default_attrs { keys %_defaults } 92 sub _get_default_value { 93 my($self, $attr) = @_; 94 $_defaults{$attr}; 95 } 96} 97 98=head1 Object Private Methods 99 100=over 4 101 102=item _init() 103 104Initialize a new object. 105 106=cut 107 108sub _init { 109 my CLASS_NAME $self = shift; 110 my %args = @_; 111 112 croak "Person IMDB ID or Name should be defined!" if !$args{crit} && !$args{file}; 113 114 $self->SUPER::_init(%args); 115 my $name = $self->name(); 116 117 for my $prop (grep { /^_/ && !/^_name$/ } sort keys %FIELDS) { 118 ($prop) = $prop =~ /^_(.*)/; 119 $self->$prop(); 120 } 121} 122 123=item _search_person() 124 125Implements a logic to search IMDB persons by their names. 126 127=cut 128 129sub _search_person { 130 my CLASS_NAME $self = shift; 131 132 return $self->SUPER::_search_results('\/name\/nm(\d+)', '/a'); 133} 134 135sub fields { 136 my CLASS_NAME $self = shift; 137 return \%FIELDS; 138} 139 140 141=back 142 143=head1 Object Public Methods 144 145=over 4 146 147=item name() 148 149Retrieve a person full name 150 151 my $person_name = $person->name(); 152 153=cut 154 155sub name { 156 my CLASS_NAME $self = shift; 157 if(!defined $self->{'_name'}) { 158 my $parser = $self->_parser(FORCED); 159 160 $parser->get_tag('title'); 161 my $title = $parser->get_text(); 162 $title =~ s#\s*\-\s*IMDB##i; 163 164 $self->_show_message("Title=$title", 'DEBUG'); 165 166 # Check if we have some search results 167 my $no_matches = 1; 168 while(my $tag = $parser->get_tag('td')) { 169 if($tag->[1]->{class} && $tag->[1]->{class} eq 'media_strip_header') { 170 $no_matches = 0; 171 last; 172 } 173 } 174 175 if($title =~ /imdb\s+name\s+search/i && !$no_matches) { 176 $self->_show_message("Go to search page ...", 'DEBUG'); 177 $title = $self->_search_person(); 178 } 179 180 $title = '' if $title =~ /IMDb Name Search/i; 181 if($title) { 182 $self->status(1); 183 $self->retrieve_code($parser, 'http://www.imdb.com/name/nm(\d+)') unless $self->code; 184 } else { 185 $self->status(0); 186 $self->error('Not Found'); 187 } 188 189 $title =~ s/^imdb\s+\-\s+//i; 190 $self->{'_name'} = $title; 191 } 192 193 return $self->{'_name'}; 194} 195 196=item mini_bio() 197 198Returns a mini bio for specified IMDB person 199 200 my $mini_bio = $person->mini_bio(); 201 202=cut 203 204sub mini_bio { 205 my CLASS_NAME $self = shift; 206 if(!defined $self->{_mini_bio}) { 207 my $parser = $self->_parser(FORCED); 208 while(my $tag = $parser->get_tag('div') ) { 209 last if $tag->[1]->{class} && $tag->[1]->{class} eq 'infobar'; 210 } 211 212 my $tag = $parser->get_tag('p'); 213 $self->{'_mini_bio'} = $parser->get_trimmed_text('a'); 214 } 215 return $self->{'_mini_bio'}; 216} 217 218=item date_of_birth() 219 220Returns a date of birth of IMDB person in format 'day' 'month caption' 'year': 221 222 my $d_birth = $person->date_of_birth(); 223 224=cut 225 226#TODO: add date convertion in different formats. 227sub date_of_birth { 228 my CLASS_NAME $self = shift; 229 if(!defined $self->{'_date_of_birth'}) { 230 my $parser = $self->_parser(FORCED); 231 while(my $tag = $parser->get_tag(MAIN_TAG)) { 232 my $text = $parser->get_text; 233 last if $text =~ /^Born/i; 234 } 235 236 my $date = ''; 237 my $year = ''; 238 my $place = ''; 239 while(my $tag = $parser->get_tag()) { 240 last if $tag->[0] eq '/td'; 241 242 if($tag->[0] eq 'a') { 243 my $text = $parser->get_text(); 244 next unless $text; 245 246 SWITCH: for($tag->[1]->{href}) { 247 /birth_monthday/i && do { $date = $text; $date =~ s#(\w+)\s(\d+)#$2 $1#; last SWITCH; }; 248 /birth_year/i && do { $year = $text; last SWITCH; }; 249 /birth_place/i && do { $place = $text; last SWITCH; }; 250 } 251 } 252 } 253 254 $self->{'_date_of_birth'} = {date => "$date $year", place => $place}; 255 } 256 257 return $self->{'_date_of_birth'}{'date'}; 258} 259 260=item place_of_birth() 261 262Returns a name of place of the birth 263 264 my $place = $person->place_of_birth(); 265 266=cut 267 268sub place_of_birth { 269 my CLASS_NAME $self = shift; 270 return $self->{'_date_of_birth'}{'place'}; 271} 272 273=item photo() 274 275Return a path to the person's photo 276 277 my $photo = $person->photo(); 278 279=cut 280 281sub photo { 282 my CLASS_NAME $self = shift; 283 if(!defined $self->{'_photo'}) { 284 my $tag; 285 my $parser = $self->_parser(FORCED); 286 while($tag = $parser->get_tag('img')) { 287 if($tag->[1]->{alt} && $tag->[1]->{alt} eq $self->name . ' Picture') { 288 $self->{'_photo'} = $tag->[1]{src}; 289 last; 290 } 291 } 292 293 $self->{'_photo'} = 'No Photo' unless $self->{'_photo'}; 294 } 295 296 return $self->{'_photo'}; 297} 298 299=item filmography() 300 301Returns a person's filmography as a hash of arrays with following structure: 302 303 my $fg = $person->filmography(); 304 305 __DATA__ 306 $fg = { 307 'Section' => [ 308 { title => 'movie title', 309 role => 'person role', 310 year => 'year of movie production', 311 code => 'IMDB code of movie', 312 } 313 ]; 314 } 315 316The section can be In Development, Actor, Self, Thanks, Archive Footage, Producer etc. 317 318=cut 319 320sub filmography { 321 my CLASS_NAME $self = shift; 322 323 my $films; 324 if(!$self->{'_filmography'}) { 325 my $parser = $self->_parser(FORCED); 326 while(my $tag = $parser->get_tag('h2')) { 327 328 my $text = $parser->get_text; 329 last if $text && $text =~ /filmography/i; 330 } 331 332 my $key = 'Unknown'; 333 while(my $tag = $parser->get_tag()) { 334 335 last if $tag->[0] eq 'script'; # Netx section after filmography 336 337 if($tag->[0] eq 'h5') { 338 my $caption = $parser->get_trimmed_text('h5', '/a'); 339 340 $key = $caption if $caption; 341 $key =~ s/://; 342 343 $self->_show_message("FILMOGRAPHY: key=$key; caption=$caption; trimmed=".$parser->get_trimmed_text('h5', '/a'), 'DEBUG'); 344 } 345 346 if($tag->[0] eq 'a' && $tag->[1]->{href} && $tag->[1]{href} =~ m!title\/tt(\d+)!) { 347 my $title = $parser->get_text(); 348 my $text = $parser->get_trimmed_text('br', '/li'); 349 350 $self->_show_message("link: $title --> $text", 'DEBUG'); 351 352 my $code = $1; 353 my($year, $role) = $text =~ m!\((\d+)\)\s.+\.+\s(.+)!; 354 push @{$films->{$key}}, { title => $title, 355 code => $code, 356 year => $year, 357 role => $role, 358 }; 359 } 360 } 361 362 $self->{'_filmography'} = $films; 363 364 } else { 365 $self->_show_message("filmography defined!", 'DEBUG'); 366 } 367 368 return $self->{'_filmography'}; 369} 370 371=item genres() 372 373Retrieve a list of movie genres for specified person: 374 375 my $genres = $persons->genres; 376 377=cut 378 379sub genres { 380 my CLASS_NAME $self = shift; 381 382 unless($self->{_genres}) { 383 my @genres = $self->_get_common_array_propery('genres'); 384 $self->{_genres} = \@genres; 385 } 386 387 $self->{_genres}; 388} 389 390=item plot_keywords() 391 392Retrieve a list of keywords for movies where specified person plays: 393 394 my $keywords = $persons->plot_keywords; 395 396=cut 397 398sub plot_keywords { 399 my CLASS_NAME $self = shift; 400 401 unless($self->{_plot_keywords}) { 402 my @keywords = $self->_get_common_array_propery('plot keywords'); 403 $self->{_plot_keywords} = \@keywords; 404 } 405 406 $self->{_plot_keywords}; 407} 408 409sub _get_common_array_propery { 410 my CLASS_NAME $self = shift; 411 my $target = shift || ''; 412 413 my $parser = $self->_parser(FORCED); 414 while(my $tag = $parser->get_tag(MAIN_TAG)) { 415 my $text = $parser->get_text(); 416 last if $text =~ /$target/i; 417 } 418 419 my @res = (); 420 while(my $tag = $parser->get_tag('a')) { 421 last if $tag->[1]->{class} && $tag->[1]->{class} =~ /tn15more/i; 422 push @res, $parser->get_text; 423 } 424 425 return @res; 426} 427 428sub filmography_types { 429 my CLASS_NAME $self = shift; 430} 431 432sub DESTROY { 433 my $self = shift; 434} 435 4361; 437 438__END__ 439 440=back 441 442=head1 EXPORTS 443 444No Matches.=head1 BUGS 445 446Please, send me any found bugs by email: stepanov.michael@gmail.com. 447 448=head1 SEE ALSO 449 450IMDB::Film 451IMDB::BaseClass 452WWW::Yahoo::Movies 453HTML::TokeParser 454 455=head1 AUTHOR 456 457Mikhail Stepanov AKA nite_man (stepanov.michael@gmail.com) 458 459=head1 COPYRIGHT 460 461Copyright (c) 2004 - 2007, Mikhail Stepanov. 462This module is free software. It may be used, redistributed and/or 463modified under the same terms as Perl itself. 464 465=cut 466