1package WWW::Mixi::Scraper::Plugin::ViewEvent;
2
3use strict;
4use warnings;
5use WWW::Mixi::Scraper::Plugin;
6use WWW::Mixi::Scraper::Utils qw( _uri _datetime );
7use utf8;
8
9validator {qw(
10  id       is_number
11  comm_id  is_number
12  page     is_number_or_all
13)};
14
15sub scrape {
16  my ($self, $html) = @_;
17
18  my %scraper;
19  $scraper{images} = scraper {
20    process 'a',
21      link => '@onClick';
22    process 'a>img',
23      thumb_link => '@src';
24    result qw( link thumb_link );
25  };
26
27  $scraper{infos} = scraper {
28    process 'dt',
29      name => 'TEXT';
30    process 'dd',
31      string => 'TEXT';
32    process 'dd>a',
33      link    => '@href',
34      subject => 'TEXT';
35    result qw( name string link subject );
36  };
37
38  $scraper{topic} = scraper {
39    process 'dl.bbsList01>dt>span.date',
40      'time' => 'TEXT';
41    process 'dl.bbsList01>dt[class="bbsTitle clearfix"]>span.titleSpan',
42      'subject' => 'TEXT';
43    process 'dd.bbsContent>dl>dt>a',
44      'name'      => 'TEXT',
45      'name_link' => '@href';
46    process 'dd.bbsContent>dl>dt',
47      'name_string' => 'TEXT',
48    process 'dd.bbsContent>dl>dd',
49      'description' => $self->html_or_text;
50    process 'div.communityPhoto>table>tr>td',
51      'images[]' => $scraper{images};
52    process 'dl.bbsList01>dd.bbsInfo>dl',
53      'infos[]' => $scraper{infos};
54    result qw( time subject name_string name name_link images infos description );
55  };
56
57  $scraper{comment_body} = scraper {
58    process 'dl.commentContent01>dt>a',
59      'name_link' => '@href',
60      'name'      => 'TEXT';
61    process 'dl.commentContent01>dt',
62      'name_string' => 'TEXT';
63    process 'dl.commentContent01>dd',
64      'description' => $self->html_or_text;
65    process 'dl.commentContent01>dd>table>tr>td',
66      'images[]' => $scraper{images};
67    result qw( name_link name description images );
68  };
69
70  $scraper{comment} = scraper {
71    process 'dl.commentList01>dt>span.date',
72      'dates[]' => 'TEXT';
73    process 'dl.commentList01>dt>span.senderId',
74      'sender_ids[]' => 'TEXT';
75    process 'dl.commentList01>dd',
76      'comments[]' => $scraper{comment_body};
77    result qw( dates comments sender_ids );
78  };
79
80  my $stash = $self->post_process($scraper{topic}->scrape(\$html))->[0];
81
82  if ($stash->{name_string} && !$stash->{name}) {
83    $stash->{name} = $stash->{name_string};
84  }
85
86  foreach my $item (@{ $stash->{infos} || [] }) {
87    if ( $item->{name} eq '開催日時' ) {
88      $stash->{date} = $item->{string};
89    }
90    if ( $item->{name} eq '募集期限' ) {
91      $stash->{deadline} = $item->{string};
92    }
93    if ( $item->{name} eq '開催場所' ) {
94      $stash->{location} = $item->{string};
95    }
96    if ( $item->{name} eq '参加者' ) {
97      $stash->{list}->{count}   = $item->{string};
98      $stash->{list}->{link}    = _uri( $item->{link} );
99      $stash->{list}->{subject} = $item->{subject};
100    }
101  }
102
103  # XXX: this fails when you test with local files.
104  # However, this link cannot be extracted from the html,
105  # at least as of writing this. ugh.
106  $stash->{link} = $self->{uri};
107
108  my $stash_c = $self->post_process($scraper{comment}->scrape(\$html))->[0];
109
110  my @dates      = @{ $stash_c->{dates} || [] };
111  my @sender_ids = @{ $stash_c->{sender_ids} || [] };
112  my @comments   = @{ $stash_c->{comments} || [] };
113  foreach my $comment ( @comments ) {
114    $comment->{time}      = _datetime( shift @dates );
115    $comment->{subject}   = shift @sender_ids;
116
117    if (!$comment->{name}) {
118      $comment->{name} = $comment->{name_string} || ' ';
119    }
120
121    # incompatible with WWW::Mixi to let comment links
122    # look more 'permanent' to make plagger/rss readers happier
123    $comment->{name_link} = _uri( $comment->{name_link} );
124    $comment->{link}      = $stash->{link}
125      ? _uri( $stash->{link} . '#' . $comment->{subject} )
126      : undef;
127
128    if ( $comment->{images} ) {
129      foreach my $image ( @{ $comment->{images} || [] } ) {
130        $image->{link}       = _uri( $image->{link} );
131        $image->{thumb_link} = _uri( $image->{thumb_link} );
132      }
133    }
134  }
135
136  $stash->{comments} = \@comments;
137
138  return $stash;
139}
140
1411;
142
143__END__
144
145=head1 NAME
146
147WWW::Mixi::Scraper::Plugin::ViewEvent
148
149=head1 DESCRIPTION
150
151This is almost equivalent to WWW::Mixi->parse_view_event().
152
153=head1 METHOD
154
155=head2 scrape
156
157returns a hash reference such as
158
159  {
160    subject => 'title of the event',
161    link => 'http://mixi.jp/view_event.pl?id=xxx',
162    time => 'yyyy-mm-dd hh:mm',
163    date => 'yyyy-mm-dd',
164    deadline => 'sometime soon',
165    location => 'somewhere',
166    description => 'event description',
167    name => 'who plans',
168    name_link => 'http://mixi.jp/show_friend.pl?id=xxx',
169    list => {
170      count => '8人',
171      link => 'http://mixi.jp/list_event_member.pl?id=xxx&comm_id=xxx',
172      subject => '参加者一覧を見る',
173    },
174    comments => [
175      {
176        subject     => 1,
177        name        => 'commenter',
178        name_link   => 'http://mixi.jp/show_friend.pl?id=xxxx',
179        link        => 'http://mixi.jp/view_event.pl?id=xxxx#1',
180        time        => 'yyyy-mm-dd hh:mm',
181        description => 'comment body',
182      }
183    ]
184  }
185
186=head1 AUTHOR
187
188Kenichi Ishigaki, E<lt>ishigaki at cpan.orgE<gt>
189
190=head1 COPYRIGHT AND LICENSE
191
192Copyright (C) 2007 by Kenichi Ishigaki.
193
194This program is free software; you can redistribute it and/or
195modify it under the same terms as Perl itself.
196
197=cut
198