1## ---------------------------------------------------------------------------- 2# Auto::FetchTitle::Plugin::Mixi. 3# ----------------------------------------------------------------------------- 4# Mastering programmed by YAMASHINA Hio 5# 6# Copyright 2008 YAMASHINA Hio 7# ----------------------------------------------------------------------------- 8# $Id: Mixi.pm 31645 2009-03-28 14:40:50Z hio $ 9# ----------------------------------------------------------------------------- 10package Auto::FetchTitle::Plugin::Mixi; 11use strict; 12use warnings; 13use base 'Auto::FetchTitle::Plugin'; 14 15# mixi のコンテンツの読み込み. 16# 非公開な日記やコミュもあるので, 17# 設定で許可した箇所のみの取得を基本とする. 18# 19# 現時点で取得できるページ: 20# - ニュース 21# - コミュニティ 22# 23# 未対応のページ: 24# - マイミクページ 25# - 日記 26# - アルバム, 動画, ミュージッック等. 27 28our $DEBUG; 29*DEBUG = \$Auto::FetchTitle::DEBUG; 30 31our $MSG_NOPERM_EN = "requested page in mixi is not permitted"; 32our $MSG_NOPERM_JA = "mixi内の指定されたページは許可リストにありませんでした"; 33our $MSG_NOPERM = $MSG_NOPERM_JA; 34 351; 36 37# ----------------------------------------------------------------------------- 38# $pkg->new(\%config). 39# 40sub new 41{ 42 my $pkg = shift; 43 my $this = $pkg->SUPER::new(@_); 44 $this->{cookie_jar} = []; 45 $this; 46} 47 48# ----------------------------------------------------------------------------- 49# $obj->register($context). 50# 51sub register 52{ 53 my $this = shift; 54 my $context = shift; 55 56 $context->register_hook($this, { 57 name => 'mixi', 58 'filter.prereq' => \&filter_prereq, 59 'filter.response' => \&filter_response, 60 }); 61} 62 63# ----------------------------------------------------------------------------- 64# $this->not_permitted($ctx, $req, $block). 65# 66sub not_permitted 67{ 68 my $this = shift; 69 my $ctx = shift; 70 my $req = shift; 71 my $block = shift; 72 73 my $txt = $block->mixi_noperm_msg || $MSG_NOPERM; 74 my $type = $txt =~ s/^(\w+):\s*// ? $1 : 'noerror'; 75 76 $txt = Tools::HashTools::replace_recursive( 77 $txt, [{url => $req->{url}}] 78 ); 79 80 if( !$req->{response} ) 81 { 82 # prereq. 83 if( $type eq 'noerror' ) 84 { 85 $req->{response} = $this->response_noerror($txt); 86 }else 87 { 88 # scalar response means error message. 89 $req->{response} = $txt; 90 } 91 }else 92 { 93 if( $type ne 'noerror' ) 94 { 95 $txt = "(エラー) $txt"; 96 } 97 $req->{result}{result} = $txt; 98 } 99} 100 101# ----------------------------------------------------------------------------- 102# $res = $this->response_noerror($msg). 103# 104sub response_noerror 105{ 106 my $this = shift; 107 my $txt = shift; 108 109 my $txt_esc = $this->_escapeHTML($txt); 110 my $content = "<title>$txt_esc</title>\n"; 111 112 my $res = { 113 Protocol => 'HTTP/1.0', 114 Code => 200, 115 Message => 'Success', 116 Header => { 117 'Content-Type' => 'text/html; charset=utf-8', 118 'Content-Length' => length($content), 119 }, 120 Content => $content, 121 StreamState => 'finished', 122 }; 123 $res; 124} 125 126sub _escapeHTML 127{ 128 my $this = shift; 129 my $txt = shift; 130 $txt =~ s/&/&/g; 131 $txt =~ s/</</g; 132 $txt =~ s/>/>/g; 133 $txt =~ s/"/"/g; 134 $txt =~ s/'/'/g; 135 $txt; 136} 137 138# ----------------------------------------------------------------------------- 139# $this->detect_page($ctx, $req, $block). 140# 取得できるページか確認. 141# 142sub detect_page 143{ 144 my $this = shift; 145 my $ctx = shift; 146 my $req = shift; 147 my $block = shift; 148 149 $DEBUG and $ctx->_debug(__PACKAGE__."#detect_page, $req->{url}."); 150 my @allow_pages = ( 151 { 152 name => 'login', 153 can_show => 0, 154 re => qr{^\Qhttp://mixi.jp/login.pl\E\z}, 155 }, 156 { 157 name => 'news-login', 158 can_show => 0, 159 re => qr{^\Qhttp://mixi.jp/issue_ticket.pl?}, 160 }, 161 { 162 name => 'login-check', 163 can_show => 0, 164 re => qr{^\Qhttp://mixi.jp/check.pl?}, 165 }, 166 { 167 name => 'news', 168 can_show => 1, 169 re => qr{^\Qhttp://news.mixi.jp/view_news.pl?}, 170 }, 171 { 172 name => 'news-list-media', 173 can_show => 1, 174 re => qr{^\Qhttp://news.mixi.jp/list_news_media.pl?}, 175 }, 176 { 177 name => 'news-list-category', 178 can_show => 1, 179 re => qr{^\Qhttp://news.mixi.jp/list_news_category.pl?}, 180 }, 181 { 182 name => 'community-top', 183 can_show => 1, 184 re => qr{^\Qhttp://mixi.jp/view_community.pl?id=\E(\d+)\z}, 185 keys => ['community'], 186 }, 187 { 188 name => 'community-bbs-list', 189 can_show => 1, 190 re => qr{^http://mixi\.jp/list_bbs\.pl\?id=(\d+)&type=(?:bbs|event|enquete)\z}, 191 keys => ['community'], 192 }, 193 { 194 name => 'community-bbs-show', 195 can_show => 1, 196 re => qr{^http://mixi\.jp/view_(bbs|event|enquete)\.pl\?(?:page=\d+&)?id=\d+&(?:comment_count=\d+&)?comm_id=(\d+)(?:&page=all)?\z}, 197 keys => ['','community'], 198 }, 199 { 200 name => 'friend', 201 can_show => 1, 202 re => qr{^http://mixi\.jp/show_friend.pl\?id=(\d+)\z}, 203 keys => ['friend'], 204 }, 205 206 # album. 207 { 208 name => 'friend-album-list', 209 can_show => 1, 210 re => qr{^http://mixi.jp/list_album.pl\?id=(\d+)(?:&from=navi)?\z}, 211 keys => ['friend'], 212 }, 213 { 214 name => 'friend-album-photolist', 215 can_show => 1, 216 re => qr{^http://mixi.jp/view_album.pl\?id=(\d+)&owner_id=(\d+)&mode=(?:photo|comment)\z}, 217 keys => ['-albumid', 'friend'], 218 }, 219 { 220 name => 'friend-album-photo', 221 can_show => 1, 222 re => qr{^http://mixi.jp/view_album_photo.pl\?album_id=(\d+)&owner_id=(\d+)&number=(\d+)(?:&page=(\d+))?\z}, 223 keys => ['-albumid', 'friend', '-photoid', '-page'], 224 }, 225 226 # obsolete? 227 { 228 name => 'friend-list-diary/album/review/comment', 229 can_show => 1, 230 re => qr{^http://mixi\.jp/list_(?:diary|album|review|comment)\.pl\?(?:page=\d+&)?id=(\d+)(?:&year=\d+&month=\d+(?:&day=\d+)?)?\z}, 231 keys => ['friend'], 232 }, 233 { 234 name => 'friend-list-video/music', 235 can_show => 1, 236 re => qr{^http://(video|music)\.mixi\.jp/list_\1\.pl\?id=(\d+)\z}, 237 keys => ['', 'friend'], 238 }, 239 { 240 name => 'friend-diary', 241 can_show => 1, 242 re => qr{^http://mixi\.jp/view_diary\.pl\?id=\d+&owner_id=(\d+)\z}, 243 keys => ['friend'], 244 }, 245 { 246 name => 'friend-video', 247 can_show => 1, 248 re => qr{^http://video\.mixi\.jp/view_video\.pl\?owner_id=(\d+)&video_id=\d+\z}, 249 keys => ['friend'], 250 }, 251 { 252 name => 'friend-review', 253 can_show => 1, 254 re => qr{^http://mixi\.jp/view_item\.pl?reviewer_id=(\d+)&id=\d+\z}, 255 keys => ['friend'], 256 }, 257 ); 258 259 foreach my $page (@allow_pages) 260 { 261 $DEBUG and $ctx->_debug($req, "- check $page->{name}."); 262 my $values = [$req->{url} =~ $page->{re}]; 263 my $keys = $page->{keys} || ['']; 264 if( @$values != @$keys) 265 { 266 $DEBUG and $ctx->_debug($req, "- - not match."); 267 next; 268 } 269 270 foreach my $idx (0..$#$keys) 271 { 272 my $key = $keys->[$idx]; 273 $key or next; 274 $key =~ /^\w/ or next; 275 my $val = $values->[$idx]; 276 my $conf_key = "mixi_$key"; 277 my $allowed; 278 foreach my $_conf_val ($block->$conf_key('all')) 279 { 280 my $conf_val = $_conf_val; # copy (unalias). 281 $conf_val =~ s/\s*#.*//s; 282 $conf_val or next; 283 foreach my $item (split(/[,\s]+/, $conf_val)) 284 { 285 $item or next; 286 $item eq '*' and $allowed=1, last; 287 $item =~ /^0*(\d+)\z/ or next; 288 $1 == $val and $allowed=1, last; 289 } 290 $allowed and last; 291 } 292 if( !$allowed ) 293 { 294 $DEBUG and $ctx->_debug($req, "- - not match / mixi-$key = $val"); 295 return; 296 } 297 } 298 $DEBUG and $ctx->_debug($req, "- - match."); 299 $page->{values} = $values; 300 return $page; 301 } 302 return undef; 303} 304 305# ----------------------------------------------------------------------------- 306# $this->filter_prereq($ctx, $arg). 307# (impl:fetchtitle-filter) 308# mixi/prereq. 309# 310sub filter_prereq 311{ 312 my $this = shift; 313 my $ctx = shift; 314 my $arg = shift; 315 316 my $req = $arg->{req}; 317 my $block = $arg->{block}; 318 319 $DEBUG and $ctx->_debug($req, "- mixi.check multiple login pages."); 320 my $seen = { login => 0, news_login => 0 }; 321 my $prev = $req; 322 while( $prev ) 323 { 324 if( $prev->{url} eq 'http://mixi.jp/login.pl' ) 325 { 326 ++$seen->{login}; 327 $DEBUG and $ctx->_debug($req, "- login-page: $prev->{url}"); 328 }elsif( $prev->{url} =~ m{^\Qhttp://mixi.jp/issue_ticket.pl?\E} ) 329 { 330 ++$seen->{news_login}; 331 $DEBUG and $ctx->_debug($req, "- news-login-page: $prev->{url}"); 332 }else 333 { 334 $DEBUG and $ctx->_debug($req, "- normal-page: $prev->{url}"); 335 } 336 $prev = $prev->{old}; 337 } 338 if( $seen->{login} >= 2 || $seen->{news_login} >= 3 ) 339 { 340 my $msg = "login pages (login=$seen->{login},news_login=$seen->{news_login})"; 341 #$ctx->_debug($req, $msg); 342 #$req->{response} = "mixi multiple login pages (login=$seen->{login},news_login=$seen->{news_login})"; 343 #return; 344 } 345 346 my $allowed = $this->detect_page($ctx, $req, $block); 347 if( !$allowed ) 348 { 349 $this->not_permitted($ctx, $req, $block); 350 return; 351 } 352 353 $ctx->_apply_recv_limit($req, 1000*1024); 354 355 $ctx->_add_cookie_header($req, $this->{cookie_jar}); 356} 357 358# ----------------------------------------------------------------------------- 359# $this->filter_response($ctx, $arg). 360# (impl:fetchtitle-filter) 361# mixi/response. 362# 363sub filter_response 364{ 365 my $this = shift; 366 my $ctx = shift; 367 my $arg = shift; 368 369 my $req = $arg->{req}; 370 my $block = $arg->{block}; 371 372 if( $req->{parsed_cookies} ) 373 { 374 $ctx->_merge_cookies($this->{cookie_jar}, $req->{parsed_cookies}); 375 } 376 377 if( !ref($req->{response}) ) 378 { 379 $DEBUG and $ctx->_debug($req, "debug: - - skip/not ref"); 380 return; 381 } 382 383 my $result = $req->{result}; 384 if( $result->{decoded_content} =~ m{<div id="errorArea">(.*?)</div>}s ) 385 { 386 my $txt = $1; 387 $txt = $ctx->_fixup_title($txt); 388 $req->{result}{result} = $txt . ' - ' . $req->{result}{result}; 389 return; 390 } 391 392 if( $result->{decoded_content} =~ m{<form action="(/login.pl)" method="post" name="login_form">(.*)</form>}s ) 393 { 394 my $path = $1; 395 my $form = $2; 396 $DEBUG and $ctx->_debug($req, __PACKAGE__."#_filter_mixi_response, login form found ($path)"); 397 $this->_do_login($ctx, $req, $block, $form, $path); 398 }else 399 { 400 my $page = $this->detect_page($ctx, $req, $block); 401 if( !$page ) 402 { 403 $this->not_permitted($ctx, $req, $block); 404 return; 405 } 406 } 407} 408 409sub _do_login 410{ 411 my $this = shift; 412 my $ctx = shift; 413 my $req = shift; 414 my $block = shift; 415 my $form = shift; 416 my $path = shift; 417 418 my @post; 419 my $redir_url = 'http://'.($req->{headers}{Host}||'mixi.jp').$path; 420 while( $form =~ m{<input\s+(.*?)>}sg ) 421 { 422 my $attrs = $1; 423 my %attrs = $attrs =~ /(\w+)="(.*?)"/g; 424 my $name = $attrs{name} or next; 425 my $value = $attrs{value}; 426 $name = $ctx->_unescapeHTML($name); 427 $value &&= $ctx->_unescapeHTML($value); 428 if( $name eq 'email' ) 429 { 430 $value = $ctx->_decode_value($this->{config}->mixi_user); 431 if( !$value ) 432 { 433 $ctx->_debug($req, "no mixi-user"); 434 return; 435 } 436 } 437 if( $name eq 'password' ) 438 { 439 $value = $ctx->_decode_value($this->{config}->mixi_pass); 440 if( !$value ) 441 { 442 $ctx->_debug($req, "no mixi-pass"); 443 return; 444 } 445 } 446 defined($value) or next; 447 $value =~ s{([^\w./])}{sprintf('%%%02x',unpack("C",$1))}ge; 448 push(@post, "$name=$value"); 449 } 450 if( @post ) 451 { 452 $req->{result}{redirect} = { 453 url => $redir_url, 454 method => 'POST', 455 content => join('&', @post), 456 max_redirects => 7, 457 }; 458 } 459} 460 461# ----------------------------------------------------------------------------- 462# End of Module. 463# ----------------------------------------------------------------------------- 464# ----------------------------------------------------------------------------- 465# End of File. 466# ----------------------------------------------------------------------------- 467__END__ 468 469=encoding utf8 470 471=for stopwords 472 YAMASHINA 473 Hio 474 ACKNOWLEDGEMENTS 475 AnnoCPAN 476 CPAN 477 RT 478 479=begin tiarra-doc 480 481info: Mixiにログインして見出し抽出出来るようにするFetchTitleプラグイン. 482default: off 483 484# Auto::FetchTitle { ... } での設定. 485# 486# + Auto::FetchTitle { 487# mask: #* &mixi http://* 488# plugins { 489# Mixi { 490# mixi-user: xxx 491# mixi-pass: yyy 492# } 493# } 494# conf-mixi { 495# filter-mixi { 496# url: http://mixi.jp/* 497# url: http://news.mixi.jp/* 498# type: mixi 499# timeout: 10 500# #閲覧可能なコミュニティの指定. 501# #mixi-community: 0 502# #閲覧可能なユーザの指定. 503# #指定したユーザには足跡踏んで見に行きます. 504# #mixi-friend: 0 505# #閲覧可能にしていないページを表示したときのメッセージ. 506# #要求されたページを #(url) で展開できます. 507# #mixi-noperm-msg: not permitted #(url). 508# } 509# } 510# } 511# 512# アカウント情報は plugins Mixi に記述. 513# mixi-pass には {B}bbbb でBASE64エンコード値も可能. 514# 515# newsだけしか使わない場合でも, ログイン処理が必要なので 516# mixi.jp 内のいくつかのURLはこのプラグインで処理する必要があります. 517# url: http://news.mixi.jp/* 518# url: http://mixi.jp/issue_ticket.pl?* 519# url: http://mixi.jp/login.pl 520# url: http://mixi.jp/check.pl?* 521# (それぞれ, ニュースページ, ログイン処理, エラー検出, 途中経路になります.) 522 523=end tiarra-doc 524 525=cut 526 527