1package WWW::Mixi::Scraper::Plugin::ViewEvent; 2 3use strict; 4use warnings; 5use WWW::Mixi::Scraper::Plugin; 6use WWW::Mixi::Scraper::Utils qw( _uri _datetime ); 7use utf8; 8 9validator {qw( 10 id is_number 11 comm_id is_number 12 page is_number_or_all 13)}; 14 15sub scrape { 16 my ($self, $html) = @_; 17 18 my %scraper; 19 $scraper{images} = scraper { 20 process 'a', 21 link => '@onClick'; 22 process 'a>img', 23 thumb_link => '@src'; 24 result qw( link thumb_link ); 25 }; 26 27 $scraper{infos} = scraper { 28 process 'dt', 29 name => 'TEXT'; 30 process 'dd', 31 string => 'TEXT'; 32 process 'dd>a', 33 link => '@href', 34 subject => 'TEXT'; 35 result qw( name string link subject ); 36 }; 37 38 $scraper{topic} = scraper { 39 process 'dl.bbsList01>dt>span.date', 40 'time' => 'TEXT'; 41 process 'dl.bbsList01>dt[class="bbsTitle clearfix"]>span.titleSpan', 42 'subject' => 'TEXT'; 43 process 'dd.bbsContent>dl>dt>a', 44 'name' => 'TEXT', 45 'name_link' => '@href'; 46 process 'dd.bbsContent>dl>dt', 47 'name_string' => 'TEXT', 48 process 'dd.bbsContent>dl>dd', 49 'description' => $self->html_or_text; 50 process 'div.communityPhoto>table>tr>td', 51 'images[]' => $scraper{images}; 52 process 'dl.bbsList01>dd.bbsInfo>dl', 53 'infos[]' => $scraper{infos}; 54 result qw( time subject name_string name name_link images infos description ); 55 }; 56 57 $scraper{comment_body} = scraper { 58 process 'dl.commentContent01>dt>a', 59 'name_link' => '@href', 60 'name' => 'TEXT'; 61 process 'dl.commentContent01>dt', 62 'name_string' => 'TEXT'; 63 process 'dl.commentContent01>dd', 64 'description' => $self->html_or_text; 65 process 'dl.commentContent01>dd>table>tr>td', 66 'images[]' => $scraper{images}; 67 result qw( name_link name description images ); 68 }; 69 70 $scraper{comment} = scraper { 71 process 'dl.commentList01>dt>span.date', 72 'dates[]' => 'TEXT'; 73 process 'dl.commentList01>dt>span.senderId', 74 'sender_ids[]' => 'TEXT'; 75 process 'dl.commentList01>dd', 76 'comments[]' => $scraper{comment_body}; 77 result qw( dates comments sender_ids ); 78 }; 79 80 my $stash = $self->post_process($scraper{topic}->scrape(\$html))->[0]; 81 82 if ($stash->{name_string} && !$stash->{name}) { 83 $stash->{name} = $stash->{name_string}; 84 } 85 86 foreach my $item (@{ $stash->{infos} || [] }) { 87 if ( $item->{name} eq '開催日時' ) { 88 $stash->{date} = $item->{string}; 89 } 90 if ( $item->{name} eq '募集期限' ) { 91 $stash->{deadline} = $item->{string}; 92 } 93 if ( $item->{name} eq '開催場所' ) { 94 $stash->{location} = $item->{string}; 95 } 96 if ( $item->{name} eq '参加者' ) { 97 $stash->{list}->{count} = $item->{string}; 98 $stash->{list}->{link} = _uri( $item->{link} ); 99 $stash->{list}->{subject} = $item->{subject}; 100 } 101 } 102 103 # XXX: this fails when you test with local files. 104 # However, this link cannot be extracted from the html, 105 # at least as of writing this. ugh. 106 $stash->{link} = $self->{uri}; 107 108 my $stash_c = $self->post_process($scraper{comment}->scrape(\$html))->[0]; 109 110 my @dates = @{ $stash_c->{dates} || [] }; 111 my @sender_ids = @{ $stash_c->{sender_ids} || [] }; 112 my @comments = @{ $stash_c->{comments} || [] }; 113 foreach my $comment ( @comments ) { 114 $comment->{time} = _datetime( shift @dates ); 115 $comment->{subject} = shift @sender_ids; 116 117 if (!$comment->{name}) { 118 $comment->{name} = $comment->{name_string} || ' '; 119 } 120 121 # incompatible with WWW::Mixi to let comment links 122 # look more 'permanent' to make plagger/rss readers happier 123 $comment->{name_link} = _uri( $comment->{name_link} ); 124 $comment->{link} = $stash->{link} 125 ? _uri( $stash->{link} . '#' . $comment->{subject} ) 126 : undef; 127 128 if ( $comment->{images} ) { 129 foreach my $image ( @{ $comment->{images} || [] } ) { 130 $image->{link} = _uri( $image->{link} ); 131 $image->{thumb_link} = _uri( $image->{thumb_link} ); 132 } 133 } 134 } 135 136 $stash->{comments} = \@comments; 137 138 return $stash; 139} 140 1411; 142 143__END__ 144 145=head1 NAME 146 147WWW::Mixi::Scraper::Plugin::ViewEvent 148 149=head1 DESCRIPTION 150 151This is almost equivalent to WWW::Mixi->parse_view_event(). 152 153=head1 METHOD 154 155=head2 scrape 156 157returns a hash reference such as 158 159 { 160 subject => 'title of the event', 161 link => 'http://mixi.jp/view_event.pl?id=xxx', 162 time => 'yyyy-mm-dd hh:mm', 163 date => 'yyyy-mm-dd', 164 deadline => 'sometime soon', 165 location => 'somewhere', 166 description => 'event description', 167 name => 'who plans', 168 name_link => 'http://mixi.jp/show_friend.pl?id=xxx', 169 list => { 170 count => '8人', 171 link => 'http://mixi.jp/list_event_member.pl?id=xxx&comm_id=xxx', 172 subject => '参加者一覧を見る', 173 }, 174 comments => [ 175 { 176 subject => 1, 177 name => 'commenter', 178 name_link => 'http://mixi.jp/show_friend.pl?id=xxxx', 179 link => 'http://mixi.jp/view_event.pl?id=xxxx#1', 180 time => 'yyyy-mm-dd hh:mm', 181 description => 'comment body', 182 } 183 ] 184 } 185 186=head1 AUTHOR 187 188Kenichi Ishigaki, E<lt>ishigaki at cpan.orgE<gt> 189 190=head1 COPYRIGHT AND LICENSE 191 192Copyright (C) 2007 by Kenichi Ishigaki. 193 194This program is free software; you can redistribute it and/or 195modify it under the same terms as Perl itself. 196 197=cut 198