1=head1 NAME
2
3IMDB::Persons - Perl extension for retrieving movies persons
4from IMDB.com
5
6=head1 SYNOPSIS
7
8  	use IMDB::Persons;
9
10	#
11	# Retrieve a person information by IMDB code
12	#
13	my $person = new IMDB::Persons(crit => '0000129');
14
15	or
16
17	#
18	# Retrieve a person information by name
19	#
20  	my $person = new IMDB::Persons(crit => 'Tom Cruise');
21
22	or
23
24	#
25	# Process already stored HTML page from IMDB
26	#
27	my $person = new IMDB::Persons(file => 'imdb.html');
28
29	if($person->status) {
30		print "Name: ".$person->name."\n";
31		print "Birth Date: ".$person->date_of_birth."\n";
32	} else {
33		print "Something wrong: ".$person->error."!\n";
34	}
35
36=head1 DESCRIPTION
37
38IMDB::Persons allows to retrieve an information about
39IMDB persons (actors, actresses, directors etc): full name,
40photo, date and place of birth, mini bio and filmography.
41
42=cut
43
44package IMDB::Persons;
45
46use strict;
47use warnings;
48
49use Carp;
50
51use Data::Dumper;
52
53use base qw(IMDB::BaseClass);
54
55use fields qw(	_name
56				_date_of_birth
57				_place_of_birth
58				_photo
59				_mini_bio
60				_filmography_types
61				_filmography
62				_genres
63				_plot_keywords
64	);
65
66use vars qw($VERSION %FIELDS);
67
68use constant FORCED 	=> 1;
69use constant CLASS_NAME => 'IMDB::Persons';
70use constant MAIN_TAG	=> 'h4';
71
72BEGIN {
73	$VERSION = '0.53';
74}
75
76{
77	my %_defaults = (
78		cache			=> 0,
79		debug			=> 0,
80		error			=> [],
81		matched			=> [],
82		cache_exp		=> '1 h',
83        host			=> 'www.imdb.com',
84        query			=> 'name/nm',
85        search 			=> 'find?nm=on&mx=20&q=',
86		status			=> 0,
87		timeout			=> 10,
88		user_agent		=> 'Mozilla/5.0',
89	);
90
91	sub _get_default_attrs { keys %_defaults }
92	sub _get_default_value {
93		my($self, $attr) = @_;
94		$_defaults{$attr};
95	}
96}
97
98=head1 Object Private Methods
99
100=over 4
101
102=item _init()
103
104Initialize a new object.
105
106=cut
107
108sub _init {
109	my CLASS_NAME $self = shift;
110	my %args = @_;
111
112	croak "Person IMDB ID or Name should be defined!" if !$args{crit} && !$args{file};
113
114	$self->SUPER::_init(%args);
115	my $name = $self->name();
116
117	for my $prop (grep { /^_/ && !/^_name$/ } sort keys %FIELDS) {
118		($prop) = $prop =~ /^_(.*)/;
119		$self->$prop();
120	}
121}
122
123=item _search_person()
124
125Implements a logic to search IMDB persons by their names.
126
127=cut
128
129sub _search_person {
130	my CLASS_NAME $self = shift;
131
132	return $self->SUPER::_search_results('\/name\/nm(\d+)', '/a');
133}
134
135sub fields {
136	my CLASS_NAME $self = shift;
137	return \%FIELDS;
138}
139
140
141=back
142
143=head1 Object Public Methods
144
145=over 4
146
147=item name()
148
149Retrieve a person full name
150
151	my $person_name = $person->name();
152
153=cut
154
155sub name {
156	my CLASS_NAME $self = shift;
157	if(!defined $self->{'_name'}) {
158		my $parser = $self->_parser(FORCED);
159
160		$parser->get_tag('title');
161		my $title = $parser->get_text();
162		$title =~ s#\s*\-\s*IMDB##i;
163
164		$self->_show_message("Title=$title", 'DEBUG');
165
166		# Check if we have some search results
167		my $no_matches = 1;
168		while(my $tag = $parser->get_tag('td')) {
169			if($tag->[1]->{class} && $tag->[1]->{class} eq 'media_strip_header') {
170				$no_matches = 0;
171				last;
172			}
173		}
174
175		if($title =~ /imdb\s+name\s+search/i && !$no_matches) {
176			$self->_show_message("Go to search page ...", 'DEBUG');
177			$title = $self->_search_person();
178		}
179
180		$title = '' if $title =~ /IMDb Name Search/i;
181		if($title) {
182			$self->status(1);
183			$self->retrieve_code($parser, 'http://www.imdb.com/name/nm(\d+)') unless $self->code;
184		} else {
185			$self->status(0);
186			$self->error('Not Found');
187		}
188
189		$title =~ s/^imdb\s+\-\s+//i;
190		$self->{'_name'} = $title;
191	}
192
193	return $self->{'_name'};
194}
195
196=item mini_bio()
197
198Returns a mini bio for specified IMDB person
199
200	my $mini_bio = $person->mini_bio();
201
202=cut
203
204sub mini_bio {
205	my CLASS_NAME $self = shift;
206	if(!defined $self->{_mini_bio}) {
207		my $parser = $self->_parser(FORCED);
208		while(my $tag = $parser->get_tag('div') ) {
209			last if $tag->[1]->{class} && $tag->[1]->{class} eq 'infobar';
210		}
211
212		my $tag = $parser->get_tag('p');
213		$self->{'_mini_bio'} = $parser->get_trimmed_text('a');
214	}
215	return $self->{'_mini_bio'};
216}
217
218=item date_of_birth()
219
220Returns a date of birth of IMDB person in format 'day' 'month caption' 'year':
221
222	my $d_birth = $person->date_of_birth();
223
224=cut
225
226#TODO: add date convertion in different formats.
227sub date_of_birth {
228	my CLASS_NAME $self = shift;
229	if(!defined $self->{'_date_of_birth'}) {
230		my $parser = $self->_parser(FORCED);
231		while(my $tag = $parser->get_tag(MAIN_TAG)) {
232			my $text = $parser->get_text;
233			last if $text =~ /^Born/i;
234		}
235
236		my $date = '';
237		my $year = '';
238		my $place = '';
239		while(my $tag = $parser->get_tag()) {
240			last if $tag->[0] eq '/td';
241
242			if($tag->[0] eq 'a') {
243				my $text = $parser->get_text();
244				next unless $text;
245
246				SWITCH: for($tag->[1]->{href}) {
247					/birth_monthday/i && do { $date = $text; $date =~ s#(\w+)\s(\d+)#$2 $1#; last SWITCH; };
248					/birth_year/i && do { $year = $text; last SWITCH; };
249					/birth_place/i && do { $place = $text; last SWITCH; };
250				}
251			}
252		}
253
254		$self->{'_date_of_birth'} = {date => "$date $year", place => $place};
255	}
256
257	return $self->{'_date_of_birth'}{'date'};
258}
259
260=item place_of_birth()
261
262Returns a name of place of the birth
263
264	my $place = $person->place_of_birth();
265
266=cut
267
268sub place_of_birth {
269	my CLASS_NAME $self = shift;
270	return $self->{'_date_of_birth'}{'place'};
271}
272
273=item photo()
274
275Return a path to the person's photo
276
277	my $photo = $person->photo();
278
279=cut
280
281sub photo {
282	my CLASS_NAME $self = shift;
283	if(!defined $self->{'_photo'}) {
284		my $tag;
285		my $parser = $self->_parser(FORCED);
286		while($tag = $parser->get_tag('img')) {
287			if($tag->[1]->{alt} && $tag->[1]->{alt} eq $self->name . ' Picture') {
288				$self->{'_photo'} = $tag->[1]{src};
289				last;
290			}
291		}
292
293		$self->{'_photo'} = 'No Photo' unless $self->{'_photo'};
294	}
295
296	return $self->{'_photo'};
297}
298
299=item filmography()
300
301Returns a person's filmography as a hash of arrays with following structure:
302
303	my $fg = $person->filmography();
304
305	__DATA__
306	$fg = {
307		'Section' => [
308			{ 	title 	=> 'movie title',
309				role 	=> 'person role',
310				year 	=> 'year of movie production',
311				code	=> 'IMDB code of movie',
312			}
313		];
314	}
315
316The section can be In Development, Actor, Self, Thanks, Archive Footage, Producer etc.
317
318=cut
319
320sub filmography {
321	my CLASS_NAME $self = shift;
322
323	my $films;
324	if(!$self->{'_filmography'}) {
325		my $parser = $self->_parser(FORCED);
326		while(my $tag = $parser->get_tag('h2')) {
327
328			my $text = $parser->get_text;
329			last if $text && $text =~ /filmography/i;
330		}
331
332		my $key = 'Unknown';
333		while(my $tag = $parser->get_tag()) {
334
335			last if $tag->[0] eq 'script'; # Netx section after filmography
336
337			if($tag->[0] eq 'h5') {
338				my $caption = $parser->get_trimmed_text('h5', '/a');
339
340				$key = $caption if $caption;
341				$key =~ s/://;
342
343				$self->_show_message("FILMOGRAPHY: key=$key; caption=$caption; trimmed=".$parser->get_trimmed_text('h5', '/a'), 'DEBUG');
344			}
345
346			if($tag->[0] eq 'a' && $tag->[1]->{href} && $tag->[1]{href} =~ m!title\/tt(\d+)!) {
347				my $title = $parser->get_text();
348				my $text = $parser->get_trimmed_text('br', '/li');
349
350				$self->_show_message("link: $title --> $text", 'DEBUG');
351
352				my $code = $1;
353				my($year, $role) = $text =~ m!\((\d+)\)\s.+\.+\s(.+)!;
354				push @{$films->{$key}}, {	title 	=> $title,
355											code 	=> $code,
356											year	=> $year,
357											role	=> $role,
358										};
359			}
360		}
361
362		$self->{'_filmography'} = $films;
363
364	} else {
365		$self->_show_message("filmography defined!", 'DEBUG');
366	}
367
368	return $self->{'_filmography'};
369}
370
371=item genres()
372
373Retrieve a list of movie genres for specified person:
374
375	my $genres = $persons->genres;
376
377=cut
378
379sub genres {
380	my CLASS_NAME $self = shift;
381
382	unless($self->{_genres}) {
383		my @genres = $self->_get_common_array_propery('genres');
384		$self->{_genres} = \@genres;
385	}
386
387	$self->{_genres};
388}
389
390=item plot_keywords()
391
392Retrieve a list of keywords for movies where specified person plays:
393
394	my $keywords = $persons->plot_keywords;
395
396=cut
397
398sub plot_keywords {
399	my CLASS_NAME $self = shift;
400
401	unless($self->{_plot_keywords}) {
402		my @keywords = $self->_get_common_array_propery('plot keywords');
403		$self->{_plot_keywords} = \@keywords;
404	}
405
406	$self->{_plot_keywords};
407}
408
409sub _get_common_array_propery {
410	my CLASS_NAME $self = shift;
411	my $target = shift || '';
412
413	my $parser = $self->_parser(FORCED);
414	while(my $tag = $parser->get_tag(MAIN_TAG)) {
415		my $text = $parser->get_text();
416		last if $text =~ /$target/i;
417	}
418
419	my @res = ();
420	while(my $tag = $parser->get_tag('a')) {
421		last if $tag->[1]->{class} && $tag->[1]->{class} =~ /tn15more/i;
422		push @res, $parser->get_text;
423	}
424
425	return @res;
426}
427
428sub filmography_types {
429	my CLASS_NAME $self = shift;
430}
431
432sub DESTROY {
433	my $self = shift;
434}
435
4361;
437
438__END__
439
440=back
441
442=head1 EXPORTS
443
444No Matches.=head1 BUGS
445
446Please, send me any found bugs by email: stepanov.michael@gmail.com.
447
448=head1 SEE ALSO
449
450IMDB::Film
451IMDB::BaseClass
452WWW::Yahoo::Movies
453HTML::TokeParser
454
455=head1 AUTHOR
456
457Mikhail Stepanov AKA nite_man (stepanov.michael@gmail.com)
458
459=head1 COPYRIGHT
460
461Copyright (c) 2004 - 2007, Mikhail Stepanov.
462This module is free software. It may be used, redistributed and/or
463modified under the same terms as Perl itself.
464
465=cut
466