1package Hatena::Keyword; 2use strict; 3use warnings; 4use base qw(Class::Data::Inheritable Class::Accessor::Fast Class::ErrorHandler); 5use overload '""' => \&as_string, fallback => 1; 6use Carp; 7use URI; 8use RPC::XML; 9use RPC::XML::Client; 10 11our $VERSION = 0.05; 12 13my @Fields = qw(refcount word score cname); 14__PACKAGE__->mk_accessors(@Fields); 15__PACKAGE__->mk_classdata(rpc_client => RPC::XML::Client->new( 16 URI->new_abs('/xmlrpc', 'http://d.hatena.ne.jp/'), 17 useragent => [ agent => join('/', __PACKAGE__, __PACKAGE__->VERSION) ], 18)); 19 20sub extract { 21 my $class = shift; 22 my $body = shift or croak sprintf 'usage %s->extract($text)', $class; 23 my $args = shift || {}; 24 $args->{mode} = 'lite'; 25 my $res = $class->_call_rpc_with_cache($body, $args) 26 or $class->error($class->errstr); 27 my @keywords = map { $class->_instance_from_rpcdata($_) }@{$res->{wordlist}}; 28 return wantarray ? @keywords : \@keywords; 29} 30 31sub markup_as_html { 32 my $class = shift; 33 my $body = shift or croak sprintf 'usage %s->markup_as_html($text)', $class; 34 my $args = shift || {}; 35 $args->{mode} = ''; 36 my $res = $class->_call_rpc_with_cache($body, $args) 37 or $class->error($class->errstr); 38 return $res->value; 39} 40 41sub _call_rpc_with_cache { 42 my $class = shift; 43 my ($body, $args) = @_; 44 $body = pack('C0A*', $body); # hacking for utf-8 flag 45 my $cache = delete $args->{cache}; 46 return $class->_call_rpc($body, $args) unless ref($cache); 47 croak "cache object must have get and set method." 48 if not $cache->can('get') or not $cache->can('set'); 49 50 require Digest::MD5; 51 require Storable; 52 my $key = sprintf( 53 '%s-%s-%s', 54 $args->{mode} || '', 55 Digest::MD5::md5_hex($body), 56 Digest::MD5::md5_hex(Storable::freeze($args)), 57 ); 58 my $res = Storable::thaw($cache->get($key)); 59 unless (defined $res) { 60 $res = $class->_call_rpc($body, $args) 61 or return $class->error($class->errstr); 62 $cache->set($key =>Storable::freeze($res)); 63 } 64 $res; 65} 66 67sub _call_rpc { 68 my ($class, $body, $args) = @_; 69 my $params = { 70 body => RPC::XML::string->new($body), 71 score => RPC::XML::int->new($args->{score} || 0), 72 mode => RPC::XML::string->new($args->{mode} || ''), 73 cname => defined $args->{cname} ? RPC::XML::array->new( 74 map { RPC::XML::string->new($_) } @{$args->{cname}} 75 ) : undef, 76 a_target => RPC::XML::string->new($args->{a_target} || ''), 77 a_class => RPC::XML::string->new($args->{a_class} || ''), 78 }; 79 80 # For all categories, It doesn't need an undefined cname value. 81 delete $params->{cname} unless defined $params->{cname}; 82 83 my $res = $class->rpc_client->send_request( 84 RPC::XML::request->new('hatena.setkeywordlink', $params), 85 ); 86 return ref $res ? $res : $class->error(qq/RPC Error: "$res"/); 87} 88 89sub _instance_from_rpcdata { 90 my ($class, $data) = @_; 91 return $class->new({ 92 map {$_ => $data->{$_}->value } @Fields, 93 }); 94} 95 96sub jcode { 97 my $self = shift; 98 $self->{_jcode} and return $self->{_jcode}; 99 require Jcode; 100 return $self->{_jcode} = Jcode->new($self->as_string, 'utf8'); 101} 102 103sub as_string { $_[0]->word } 104 1051; 106 107__END__ 108 109=head1 NAME 110 111Hatena::Keyword - Extract Hatena Keywords in a string 112 113=head1 VERSION 114 115Version 0.03 116 117=head1 SYNOPSIS 118 119 use Hatena::Keyword; 120 121 @keywords = Hatena::Keyword->extract("Perl and Ruby and Python."); 122 print $_->score, "\t", $_ for @keywords; 123 124 $keywords = Hatena::Keyword->extract("Hello, Perl!", { 125 score => 20, 126 cname => [qw(hatena web book)], 127 }); 128 print $_->refcount, "\t", $_->jcode->euc for @$keywords; 129 130 my $cache = Cache::File->new( 131 cache_root => '/path/to/cache', 132 default_expires => '3600 sec', 133 ); 134 $keywords = Hatena::Keyword->extract("Hello, Hatena!", { 135 cache => $cache, 136 }); 137 138 $html = Hatena::Keyword->markup_as_html("Perl and Ruby"); 139 $html = Hatena::Keyword->markup_as_html("Hello, Perl!", { 140 score => 20, 141 cname => [qw(hatena web book)], 142 a_class => 'keyword', 143 a_target => '_blank', 144 }); 145 146=head1 DESCRIPTION 147 148This module allows you to extract Hatena keywords used in an 149arbitrary text and also allows you to mark up a text as HTML with 150the keywords. 151 152A Hatena keyword is an element in a suite of web sites *.hatena.ne.jp 153having blogs and social bookmarks among others. Please refer to 154http://d.hatena.ne.jp/keyword/ (in Japanese) for details. 155 156In Hatena Diary, a blog hosting service, a Hatena keyword found in a 157posting is linked to the keyword��s page automatically. You can 158implement the same kind of feature outside Hatena using this module. 159 160It queries Hatena Keyword Link API internally for retrieving terms. 161 162=head1 CLASS METHODS 163 164=head2 extract($text, \%options) 165 166Returns an array or an array reference which contains Hatena::Keyword 167objects extracted from specified text as first argument. 168 169This method works correctly for Japanese characters but their encoding 170must be utf-8. And also returned words are encoded as utf-8 string. 171 172Second argument is a option. Almost all key and values will be passed 173through to the XML-RPC API, excluding cache option. 174 175=head2 markup_as_html($text, \%options) 176 177Returns a tagged html string with Hatena Keywords like this: 178 179 <a href="http://d.hatena.ne.jp/keyword/Perl">Perl</a> and <a 180 href="http://d.hatena.ne.jp/keyword/Ruby">Ruby</a> 181 182It takes two arguments, same as C<extract()>. 183 184=head1 INSTANCE METHODS 185 186=head2 as_string 187 188Returns a Hatena::Keyword object to a plain string, an alias for 189C<word()>. Hatena::Keyword objects are also converted to plain strings 190automatically by overloading. This means that objects can be used as 191plain strings in most Perl constructs. 192 193=head2 word 194 195Returns a plain string of the word. 196 197=head2 score 198 199Returns a score of the word. 200 201=head2 refcount 202 203Returns a reference count of the word, which means used times of 204the term whole over the Hatena Diary. 205 206=head2 cname 207 208Returns a category name of the word. 209 210=head2 jcode 211 212Returns a Jcode objet which contains the word. 213 214=head1 CACHING 215 216Responses returned by Web API(XML-RPC) can be cached locally. 217C<extract> method and C<markup_as_html> accept a reference to a 218C<Cache> object as cache option. This means that you can pick out one 219of Cache's companions like C<Cache::Memory>, C<Cache::File>, etc. In 220fact, any other type of cache implementation will do as well, see the 221requirements below. 222 223 use Cache::File; 224 my $cache = Cache::File->new( 225 cache_root => '/tmp/mycache', 226 default_expires => '30 min', 227 ); 228 229 my $keywords = Hatena::Keyword->extract( 230 "Perl and Ruby", 231 { cache => $cache }, 232 ); 233 234C<Hatena::Keyword> uses I<positive> caching only, errors won't be 235cached. Erroneous requests will be sent to API server every 236time. 237 238Caching isn't limited to the C<Cache> class. Any cache object which 239adheres to the following interface can be used: 240 241 # Set a cache value 242 $cache->set($key, $value); 243 244 # Return a cached value, 'undef' if it doesn't exist 245 $cache->get($key); 246 247=head1 ACKNOWLEDGEMENTS 248 249Hideyo Imazu L<http://d.hatena.ne.jp/himazublog/> helped me writing 250the English documents. 251 252Hideyo and kosaki L<http://mkosaki.blog46.fc2.com/> and tsupo 253L<http://watcher.moe-nifty.com/> helped my decision to change the name 254of the method. 255 256Kazuhiro Osawa L<http://yappo.jp/> and Yuichi Tateno 257L<http://d.hatena.ne.jp/secondlife/> gave me an inspiration for 258caching implementation. 259 260=head1 AUTHOR 261 262Naoya Ito, C<< <naoya at bloghackers.net> >> 263 264=head1 BUGS 265 266Please report any bugs or feature requests to 267C<bug-hatena-keyword at rt.cpan.org>, or through the web interface at 268L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Hatena-Keyword>. 269I will be notified, and then you'll automatically be notified of progress on 270your bug as I make changes. 271 272=head1 SUPPORT 273 274You can find documentation for this module with the perldoc command. 275 276 perldoc Hatena::Keyword 277 278You can also look for information at: 279 280=over 4 281 282=item * AnnoCPAN: Annotated CPAN documentation 283 284L<http://annocpan.org/dist/Hatena-Keyword> 285 286=item * CPAN Ratings 287 288L<http://cpanratings.perl.org/d/Hatena-Keyword> 289 290=item * RT: CPAN's request tracker 291 292L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Hatena-Keyword> 293 294=item * Search CPAN 295 296L<http://search.cpan.org/dist/Hatena-Keyword> 297 298=back 299 300=head1 SEE ALSO 301 302=over 4 303 304=item Hatena Keyword Auto-Link API L<http://tinyurl.com/m5dkm> (redirect to 305d.hatena.ne.jp) 306 307=item Hatena Diary L<http://d.hatena.ne.jp/> 308 309=item Hatena L<http://www.hatena.ne.jp/> 310 311=back 312 313=head1 COPYRIGHT & LICENSE 314 315Copyright 2006 Naoya Ito, all rights reserved. 316 317This program is free software; you can redistribute it and/or modify it 318under the same terms as Perl itself. 319