1## ----------------------------------------------------------------------------
2#  Auto::FetchTitle::Plugin::Mixi.
3# -----------------------------------------------------------------------------
4# Mastering programmed by YAMASHINA Hio
5#
6# Copyright 2008 YAMASHINA Hio
7# -----------------------------------------------------------------------------
8# $Id: Mixi.pm 31645 2009-03-28 14:40:50Z hio $
9# -----------------------------------------------------------------------------
10package Auto::FetchTitle::Plugin::Mixi;
11use strict;
12use warnings;
13use base 'Auto::FetchTitle::Plugin';
14
15# mixi のコンテンツの読み込み.
16# 非公開な日記やコミュもあるので,
17# 設定で許可した箇所のみの取得を基本とする.
18#
19# 現時点で取得できるページ:
20# - ニュース
21# - コミュニティ
22#
23# 未対応のページ:
24# - マイミクページ
25# - 日記
26# - アルバム, 動画, ミュージッック等.
27
28our $DEBUG;
29*DEBUG = \$Auto::FetchTitle::DEBUG;
30
31our $MSG_NOPERM_EN = "requested page in mixi is not permitted";
32our $MSG_NOPERM_JA = "mixi内の指定されたページは許可リストにありませんでした";
33our $MSG_NOPERM    = $MSG_NOPERM_JA;
34
351;
36
37# -----------------------------------------------------------------------------
38# $pkg->new(\%config).
39#
40sub new
41{
42  my $pkg   = shift;
43  my $this = $pkg->SUPER::new(@_);
44  $this->{cookie_jar} = [];
45  $this;
46}
47
48# -----------------------------------------------------------------------------
49# $obj->register($context).
50#
51sub register
52{
53  my $this = shift;
54  my $context = shift;
55
56  $context->register_hook($this, {
57    name => 'mixi',
58    'filter.prereq'   => \&filter_prereq,
59    'filter.response' => \&filter_response,
60  });
61}
62
63# -----------------------------------------------------------------------------
64# $this->not_permitted($ctx, $req, $block).
65#
66sub not_permitted
67{
68  my $this = shift;
69  my $ctx  = shift;
70  my $req  = shift;
71  my $block = shift;
72
73  my $txt = $block->mixi_noperm_msg || $MSG_NOPERM;
74  my $type = $txt =~ s/^(\w+):\s*// ? $1 : 'noerror';
75
76  $txt = Tools::HashTools::replace_recursive(
77    $txt, [{url => $req->{url}}]
78  );
79
80  if( !$req->{response} )
81  {
82    # prereq.
83    if( $type eq 'noerror' )
84    {
85      $req->{response} = $this->response_noerror($txt);
86    }else
87    {
88      # scalar response means error message.
89      $req->{response} = $txt;
90    }
91  }else
92  {
93    if( $type ne 'noerror' )
94    {
95      $txt = "(エラー) $txt";
96    }
97    $req->{result}{result} = $txt;
98  }
99}
100
101# -----------------------------------------------------------------------------
102# $res = $this->response_noerror($msg).
103#
104sub response_noerror
105{
106  my $this = shift;
107  my $txt  = shift;
108
109  my $txt_esc = $this->_escapeHTML($txt);
110  my $content = "<title>$txt_esc</title>\n";
111
112  my $res = {
113    Protocol => 'HTTP/1.0',
114    Code     => 200,
115    Message  => 'Success',
116    Header   => {
117      'Content-Type'   => 'text/html; charset=utf-8',
118      'Content-Length' => length($content),
119    },
120    Content     => $content,
121    StreamState => 'finished',
122  };
123  $res;
124}
125
126sub _escapeHTML
127{
128  my $this = shift;
129  my $txt  = shift;
130  $txt =~ s/&/&amp;/g;
131  $txt =~ s/</&lt;/g;
132  $txt =~ s/>/&gt;/g;
133  $txt =~ s/"/&quot;/g;
134  $txt =~ s/'/&#39;/g;
135  $txt;
136}
137
138# -----------------------------------------------------------------------------
139# $this->detect_page($ctx, $req, $block).
140# 取得できるページか確認.
141#
142sub detect_page
143{
144  my $this  = shift;
145  my $ctx   = shift;
146  my $req   = shift;
147  my $block = shift;
148
149  $DEBUG and $ctx->_debug(__PACKAGE__."#detect_page, $req->{url}.");
150  my @allow_pages = (
151    {
152      name     => 'login',
153      can_show => 0,
154      re       => qr{^\Qhttp://mixi.jp/login.pl\E\z},
155    },
156    {
157      name     => 'news-login',
158      can_show => 0,
159      re       => qr{^\Qhttp://mixi.jp/issue_ticket.pl?},
160    },
161    {
162      name     => 'login-check',
163      can_show => 0,
164      re       => qr{^\Qhttp://mixi.jp/check.pl?},
165    },
166    {
167      name     => 'news',
168      can_show => 1,
169      re       => qr{^\Qhttp://news.mixi.jp/view_news.pl?},
170    },
171    {
172      name     => 'news-list-media',
173      can_show => 1,
174      re       => qr{^\Qhttp://news.mixi.jp/list_news_media.pl?},
175    },
176    {
177      name     => 'news-list-category',
178      can_show => 1,
179      re       => qr{^\Qhttp://news.mixi.jp/list_news_category.pl?},
180    },
181    {
182      name     => 'community-top',
183      can_show => 1,
184      re       => qr{^\Qhttp://mixi.jp/view_community.pl?id=\E(\d+)\z},
185      keys     => ['community'],
186    },
187    {
188      name     => 'community-bbs-list',
189      can_show => 1,
190      re       => qr{^http://mixi\.jp/list_bbs\.pl\?id=(\d+)&type=(?:bbs|event|enquete)\z},
191      keys     => ['community'],
192    },
193    {
194      name     => 'community-bbs-show',
195      can_show => 1,
196      re       => qr{^http://mixi\.jp/view_(bbs|event|enquete)\.pl\?(?:page=\d+&)?id=\d+&(?:comment_count=\d+&)?comm_id=(\d+)(?:&page=all)?\z},
197      keys     => ['','community'],
198    },
199    {
200      name     => 'friend',
201      can_show => 1,
202      re       => qr{^http://mixi\.jp/show_friend.pl\?id=(\d+)\z},
203      keys     => ['friend'],
204    },
205
206    # album.
207    {
208      name     => 'friend-album-list',
209      can_show => 1,
210      re       => qr{^http://mixi.jp/list_album.pl\?id=(\d+)(?:&from=navi)?\z},
211      keys     => ['friend'],
212    },
213    {
214      name     => 'friend-album-photolist',
215      can_show => 1,
216      re       => qr{^http://mixi.jp/view_album.pl\?id=(\d+)&owner_id=(\d+)&mode=(?:photo|comment)\z},
217      keys     => ['-albumid', 'friend'],
218    },
219    {
220      name     => 'friend-album-photo',
221      can_show => 1,
222      re       => qr{^http://mixi.jp/view_album_photo.pl\?album_id=(\d+)&owner_id=(\d+)&number=(\d+)(?:&page=(\d+))?\z},
223      keys     => ['-albumid', 'friend', '-photoid', '-page'],
224    },
225
226    # obsolete?
227    {
228      name     => 'friend-list-diary/album/review/comment',
229      can_show => 1,
230      re       => qr{^http://mixi\.jp/list_(?:diary|album|review|comment)\.pl\?(?:page=\d+&)?id=(\d+)(?:&year=\d+&month=\d+(?:&day=\d+)?)?\z},
231      keys     => ['friend'],
232    },
233    {
234      name     => 'friend-list-video/music',
235      can_show => 1,
236      re       => qr{^http://(video|music)\.mixi\.jp/list_\1\.pl\?id=(\d+)\z},
237      keys     => ['', 'friend'],
238    },
239    {
240      name     => 'friend-diary',
241      can_show => 1,
242      re       => qr{^http://mixi\.jp/view_diary\.pl\?id=\d+&owner_id=(\d+)\z},
243      keys     => ['friend'],
244    },
245    {
246      name     => 'friend-video',
247      can_show => 1,
248      re       => qr{^http://video\.mixi\.jp/view_video\.pl\?owner_id=(\d+)&video_id=\d+\z},
249      keys     => ['friend'],
250    },
251    {
252      name     => 'friend-review',
253      can_show => 1,
254      re       => qr{^http://mixi\.jp/view_item\.pl?reviewer_id=(\d+)&id=\d+\z},
255      keys     => ['friend'],
256    },
257  );
258
259  foreach my $page (@allow_pages)
260  {
261    $DEBUG and $ctx->_debug($req, "- check $page->{name}.");
262    my $values = [$req->{url} =~ $page->{re}];
263    my $keys = $page->{keys} || [''];
264    if( @$values != @$keys)
265    {
266      $DEBUG and $ctx->_debug($req, "- - not match.");
267      next;
268    }
269
270    foreach my $idx (0..$#$keys)
271    {
272      my $key = $keys->[$idx];
273      $key or next;
274      $key =~ /^\w/ or next;
275      my $val = $values->[$idx];
276      my $conf_key = "mixi_$key";
277      my $allowed;
278      foreach my $_conf_val ($block->$conf_key('all'))
279      {
280        my $conf_val = $_conf_val; # copy (unalias).
281        $conf_val =~ s/\s*#.*//s;
282        $conf_val or next;
283        foreach my $item (split(/[,\s]+/, $conf_val))
284        {
285          $item or next;
286          $item eq '*' and $allowed=1, last;
287          $item =~ /^0*(\d+)\z/ or next;
288          $1 == $val and $allowed=1, last;
289        }
290        $allowed and last;
291      }
292      if( !$allowed )
293      {
294        $DEBUG and $ctx->_debug($req, "- - not match / mixi-$key = $val");
295        return;
296      }
297    }
298    $DEBUG and $ctx->_debug($req, "- - match.");
299    $page->{values} = $values;
300    return $page;
301  }
302  return undef;
303}
304
305# -----------------------------------------------------------------------------
306# $this->filter_prereq($ctx, $arg).
307# (impl:fetchtitle-filter)
308# mixi/prereq.
309#
310sub filter_prereq
311{
312  my $this = shift;
313  my $ctx  = shift;
314  my $arg  = shift;
315
316  my $req   = $arg->{req};
317  my $block = $arg->{block};
318
319  $DEBUG and $ctx->_debug($req, "- mixi.check multiple login pages.");
320  my $seen = { login => 0, news_login => 0 };
321  my $prev = $req;
322  while( $prev )
323  {
324    if( $prev->{url} eq 'http://mixi.jp/login.pl' )
325    {
326      ++$seen->{login};
327      $DEBUG and $ctx->_debug($req, "- login-page: $prev->{url}");
328    }elsif( $prev->{url} =~ m{^\Qhttp://mixi.jp/issue_ticket.pl?\E} )
329    {
330      ++$seen->{news_login};
331      $DEBUG and $ctx->_debug($req, "- news-login-page: $prev->{url}");
332    }else
333    {
334      $DEBUG and $ctx->_debug($req, "- normal-page: $prev->{url}");
335    }
336    $prev = $prev->{old};
337  }
338  if( $seen->{login} >= 2 || $seen->{news_login} >= 3 )
339  {
340    my $msg = "login pages (login=$seen->{login},news_login=$seen->{news_login})";
341    #$ctx->_debug($req, $msg);
342    #$req->{response} = "mixi multiple login pages (login=$seen->{login},news_login=$seen->{news_login})";
343    #return;
344  }
345
346  my $allowed = $this->detect_page($ctx, $req, $block);
347  if( !$allowed )
348  {
349    $this->not_permitted($ctx, $req, $block);
350    return;
351  }
352
353  $ctx->_apply_recv_limit($req, 1000*1024);
354
355  $ctx->_add_cookie_header($req, $this->{cookie_jar});
356}
357
358# -----------------------------------------------------------------------------
359# $this->filter_response($ctx, $arg).
360# (impl:fetchtitle-filter)
361# mixi/response.
362#
363sub filter_response
364{
365  my $this = shift;
366  my $ctx  = shift;
367  my $arg  = shift;
368
369  my $req   = $arg->{req};
370  my $block = $arg->{block};
371
372  if( $req->{parsed_cookies} )
373  {
374    $ctx->_merge_cookies($this->{cookie_jar}, $req->{parsed_cookies});
375  }
376
377  if( !ref($req->{response}) )
378  {
379    $DEBUG and $ctx->_debug($req, "debug: - - skip/not ref");
380    return;
381  }
382
383  my $result = $req->{result};
384  if( $result->{decoded_content} =~ m{<div id="errorArea">(.*?)</div>}s )
385  {
386    my $txt = $1;
387    $txt = $ctx->_fixup_title($txt);
388    $req->{result}{result} = $txt . ' - ' . $req->{result}{result};
389    return;
390  }
391
392  if( $result->{decoded_content} =~ m{<form action="(/login.pl)" method="post" name="login_form">(.*)</form>}s )
393  {
394    my $path = $1;
395    my $form = $2;
396    $DEBUG and $ctx->_debug($req, __PACKAGE__."#_filter_mixi_response, login form found ($path)");
397    $this->_do_login($ctx, $req, $block, $form, $path);
398  }else
399  {
400    my $page = $this->detect_page($ctx, $req, $block);
401    if( !$page )
402    {
403      $this->not_permitted($ctx, $req, $block);
404      return;
405    }
406  }
407}
408
409sub _do_login
410{
411  my $this = shift;
412  my $ctx  = shift;
413  my $req  = shift;
414  my $block = shift;
415  my $form = shift;
416  my $path = shift;
417
418  my @post;
419  my $redir_url = 'http://'.($req->{headers}{Host}||'mixi.jp').$path;
420  while( $form =~ m{<input\s+(.*?)>}sg )
421  {
422    my $attrs = $1;
423    my %attrs = $attrs =~ /(\w+)="(.*?)"/g;
424    my $name  = $attrs{name}  or next;
425    my $value = $attrs{value};
426    $name    = $ctx->_unescapeHTML($name);
427    $value &&= $ctx->_unescapeHTML($value);
428    if( $name eq 'email' )
429    {
430      $value = $ctx->_decode_value($this->{config}->mixi_user);
431      if( !$value )
432      {
433        $ctx->_debug($req, "no mixi-user");
434        return;
435      }
436    }
437    if( $name eq 'password' )
438    {
439      $value = $ctx->_decode_value($this->{config}->mixi_pass);
440      if( !$value )
441      {
442        $ctx->_debug($req, "no mixi-pass");
443        return;
444      }
445    }
446    defined($value) or next;
447    $value =~ s{([^\w./])}{sprintf('%%%02x',unpack("C",$1))}ge;
448    push(@post, "$name=$value");
449  }
450  if( @post )
451  {
452    $req->{result}{redirect} = {
453      url     => $redir_url,
454      method  => 'POST',
455      content => join('&', @post),
456      max_redirects => 7,
457    };
458  }
459}
460
461# -----------------------------------------------------------------------------
462# End of Module.
463# -----------------------------------------------------------------------------
464# -----------------------------------------------------------------------------
465# End of File.
466# -----------------------------------------------------------------------------
467__END__
468
469=encoding utf8
470
471=for stopwords
472	YAMASHINA
473	Hio
474	ACKNOWLEDGEMENTS
475	AnnoCPAN
476	CPAN
477	RT
478
479=begin tiarra-doc
480
481info:    Mixiにログインして見出し抽出出来るようにするFetchTitleプラグイン.
482default: off
483
484# Auto::FetchTitle { ... } での設定.
485#
486# + Auto::FetchTitle {
487#    mask: #* &mixi http://*
488#    plugins {
489#      Mixi {
490#        mixi-user: xxx
491#        mixi-pass: yyy
492#      }
493#    }
494#    conf-mixi {
495#      filter-mixi {
496#        url: http://mixi.jp/*
497#        url: http://news.mixi.jp/*
498#        type: mixi
499#        timeout: 10
500#        #閲覧可能なコミュニティの指定.
501#        #mixi-community: 0
502#        #閲覧可能なユーザの指定.
503#        #指定したユーザには足跡踏んで見に行きます.
504#        #mixi-friend:    0
505#        #閲覧可能にしていないページを表示したときのメッセージ.
506#        #要求されたページを #(url) で展開できます.
507#        #mixi-noperm-msg: not permitted #(url).
508#      }
509#    }
510#  }
511#
512# アカウント情報は plugins Mixi に記述.
513# mixi-pass には {B}bbbb でBASE64エンコード値も可能.
514#
515# newsだけしか使わない場合でも, ログイン処理が必要なので
516# mixi.jp 内のいくつかのURLはこのプラグインで処理する必要があります.
517#   url: http://news.mixi.jp/*
518#   url: http://mixi.jp/issue_ticket.pl?*
519#   url: http://mixi.jp/login.pl
520#   url: http://mixi.jp/check.pl?*
521# (それぞれ, ニュースページ, ログイン処理, エラー検出, 途中経路になります.)
522
523=end tiarra-doc
524
525=cut
526
527