1package Hatena::Keyword;
2use strict;
3use warnings;
4use base qw(Class::Data::Inheritable Class::Accessor::Fast Class::ErrorHandler);
5use overload '""' => \&as_string, fallback => 1;
6use Carp;
7use URI;
8use RPC::XML;
9use RPC::XML::Client;
10
11our $VERSION = 0.05;
12
13my @Fields = qw(refcount word score cname);
14__PACKAGE__->mk_accessors(@Fields);
15__PACKAGE__->mk_classdata(rpc_client => RPC::XML::Client->new(
16    URI->new_abs('/xmlrpc', 'http://d.hatena.ne.jp/'),
17    useragent => [ agent => join('/', __PACKAGE__, __PACKAGE__->VERSION) ],
18));
19
20sub extract {
21    my $class = shift;
22    my $body = shift or croak sprintf 'usage %s->extract($text)', $class;
23    my $args = shift || {};
24    $args->{mode} = 'lite';
25    my $res = $class->_call_rpc_with_cache($body, $args)
26        or $class->error($class->errstr);
27    my @keywords = map { $class->_instance_from_rpcdata($_) }@{$res->{wordlist}};
28    return wantarray ? @keywords : \@keywords;
29}
30
31sub markup_as_html {
32    my $class = shift;
33    my $body = shift or croak sprintf 'usage %s->markup_as_html($text)', $class;
34    my $args = shift || {};
35    $args->{mode} = '';
36    my $res = $class->_call_rpc_with_cache($body, $args)
37        or $class->error($class->errstr);
38    return $res->value;
39}
40
41sub _call_rpc_with_cache {
42    my $class = shift;
43    my ($body, $args) = @_;
44    $body = pack('C0A*', $body); # hacking for utf-8 flag
45    my $cache = delete $args->{cache};
46    return $class->_call_rpc($body, $args) unless ref($cache);
47    croak "cache object must have get and set method."
48        if not $cache->can('get') or not $cache->can('set');
49
50    require Digest::MD5;
51    require Storable;
52    my $key = sprintf(
53        '%s-%s-%s',
54        $args->{mode} || '',
55        Digest::MD5::md5_hex($body),
56        Digest::MD5::md5_hex(Storable::freeze($args)),
57    );
58    my $res = Storable::thaw($cache->get($key));
59    unless (defined $res) {
60        $res = $class->_call_rpc($body, $args)
61            or return $class->error($class->errstr);
62        $cache->set($key =>Storable::freeze($res));
63    }
64    $res;
65}
66
67sub _call_rpc {
68    my ($class, $body, $args) = @_;
69    my $params = {
70        body  => RPC::XML::string->new($body),
71        score => RPC::XML::int->new($args->{score} || 0),
72        mode  => RPC::XML::string->new($args->{mode} || ''),
73        cname => defined $args->{cname} ? RPC::XML::array->new(
74            map { RPC::XML::string->new($_) } @{$args->{cname}}
75        ) : undef,
76        a_target => RPC::XML::string->new($args->{a_target} || ''),
77        a_class  => RPC::XML::string->new($args->{a_class} || ''),
78    };
79
80    # For all categories, It doesn't need an undefined cname value.
81    delete $params->{cname} unless defined $params->{cname};
82
83    my $res = $class->rpc_client->send_request(
84        RPC::XML::request->new('hatena.setkeywordlink', $params),
85    );
86    return ref $res ? $res : $class->error(qq/RPC Error: "$res"/);
87}
88
89sub _instance_from_rpcdata {
90    my ($class, $data) = @_;
91    return $class->new({
92        map {$_ => $data->{$_}->value  } @Fields,
93    });
94}
95
96sub jcode {
97    my $self = shift;
98    $self->{_jcode} and return $self->{_jcode};
99    require Jcode;
100    return $self->{_jcode} = Jcode->new($self->as_string, 'utf8');
101}
102
103sub as_string { $_[0]->word }
104
1051;
106
107__END__
108
109=head1 NAME
110
111Hatena::Keyword - Extract Hatena Keywords in a string
112
113=head1 VERSION
114
115Version 0.03
116
117=head1 SYNOPSIS
118
119    use Hatena::Keyword;
120
121    @keywords = Hatena::Keyword->extract("Perl and Ruby and Python.");
122    print $_->score, "\t", $_ for @keywords;
123
124    $keywords = Hatena::Keyword->extract("Hello, Perl!", {
125        score => 20,
126        cname => [qw(hatena web book)],
127    });
128    print $_->refcount, "\t", $_->jcode->euc for @$keywords;
129
130    my $cache = Cache::File->new(
131        cache_root      => '/path/to/cache',
132        default_expires => '3600 sec',
133    );
134    $keywords = Hatena::Keyword->extract("Hello, Hatena!",  {
135        cache => $cache,
136    });
137
138    $html = Hatena::Keyword->markup_as_html("Perl and Ruby");
139    $html = Hatena::Keyword->markup_as_html("Hello, Perl!", {
140        score    => 20,
141        cname    => [qw(hatena web book)],
142        a_class  => 'keyword',
143        a_target => '_blank',
144    });
145
146=head1 DESCRIPTION
147
148This module allows you to extract Hatena keywords used in an
149arbitrary text and also allows you to mark up a text as HTML with
150the keywords.
151
152A Hatena keyword is an element in a suite of web sites *.hatena.ne.jp
153having blogs and social bookmarks among others. Please refer to
154http://d.hatena.ne.jp/keyword/ (in Japanese) for details.
155
156In Hatena Diary, a blog hosting service, a Hatena keyword found in a
157posting is linked to the keyword��s page automatically. You can
158implement the same kind of feature outside Hatena using this module.
159
160It queries Hatena Keyword Link API internally for retrieving terms.
161
162=head1 CLASS METHODS
163
164=head2 extract($text, \%options)
165
166Returns an array or an array reference which contains Hatena::Keyword
167objects extracted from specified text as first argument.
168
169This method works correctly for Japanese characters but their encoding
170must be utf-8. And also returned words are encoded as utf-8 string.
171
172Second argument is a option. Almost all key and values will be passed
173through to the XML-RPC API, excluding cache option.
174
175=head2 markup_as_html($text, \%options)
176
177Returns a tagged html string with Hatena Keywords like this:
178
179  <a href="http://d.hatena.ne.jp/keyword/Perl">Perl</a> and <a
180  href="http://d.hatena.ne.jp/keyword/Ruby">Ruby</a>
181
182It takes two arguments, same as C<extract()>.
183
184=head1 INSTANCE METHODS
185
186=head2 as_string
187
188Returns a Hatena::Keyword object to a plain string, an alias for
189C<word()>. Hatena::Keyword objects are also converted to plain strings
190automatically by overloading. This means that objects can be used as
191plain strings in most Perl constructs.
192
193=head2 word
194
195Returns a plain string of the word.
196
197=head2 score
198
199Returns a score of the word.
200
201=head2 refcount
202
203Returns a reference count of the word, which means used times of
204the term whole over the Hatena Diary.
205
206=head2 cname
207
208Returns a category name of the word.
209
210=head2 jcode
211
212Returns a Jcode objet which contains the word.
213
214=head1 CACHING
215
216Responses returned by Web API(XML-RPC) can be cached locally.
217C<extract> method and C<markup_as_html> accept a reference to a
218C<Cache> object as cache option. This means that you can pick out one
219of Cache's companions like C<Cache::Memory>, C<Cache::File>, etc. In
220fact, any other type of cache implementation will do as well, see the
221requirements below.
222
223    use Cache::File;
224    my $cache = Cache::File->new(
225        cache_root        => '/tmp/mycache',
226        default_expires   => '30 min',
227    );
228
229    my $keywords = Hatena::Keyword->extract(
230        "Perl and Ruby",
231        { cache => $cache },
232    );
233
234C<Hatena::Keyword> uses I<positive> caching only, errors won't be
235cached.  Erroneous requests will be sent to API server every
236time.
237
238Caching isn't limited to the C<Cache> class. Any cache object which
239adheres to the following interface can be used:
240
241    # Set a cache value
242    $cache->set($key, $value);
243
244    # Return a cached value, 'undef' if it doesn't exist
245    $cache->get($key);
246
247=head1 ACKNOWLEDGEMENTS
248
249Hideyo Imazu L<http://d.hatena.ne.jp/himazublog/> helped me writing
250the English documents.
251
252Hideyo and kosaki L<http://mkosaki.blog46.fc2.com/> and tsupo
253L<http://watcher.moe-nifty.com/> helped my decision to change the name
254of the method.
255
256Kazuhiro Osawa L<http://yappo.jp/> and Yuichi Tateno
257L<http://d.hatena.ne.jp/secondlife/> gave me an inspiration for
258caching implementation.
259
260=head1 AUTHOR
261
262Naoya Ito, C<< <naoya at bloghackers.net> >>
263
264=head1 BUGS
265
266Please report any bugs or feature requests to
267C<bug-hatena-keyword at rt.cpan.org>, or through the web interface at
268L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Hatena-Keyword>.
269I will be notified, and then you'll automatically be notified of progress on
270your bug as I make changes.
271
272=head1 SUPPORT
273
274You can find documentation for this module with the perldoc command.
275
276    perldoc Hatena::Keyword
277
278You can also look for information at:
279
280=over 4
281
282=item * AnnoCPAN: Annotated CPAN documentation
283
284L<http://annocpan.org/dist/Hatena-Keyword>
285
286=item * CPAN Ratings
287
288L<http://cpanratings.perl.org/d/Hatena-Keyword>
289
290=item * RT: CPAN's request tracker
291
292L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Hatena-Keyword>
293
294=item * Search CPAN
295
296L<http://search.cpan.org/dist/Hatena-Keyword>
297
298=back
299
300=head1 SEE ALSO
301
302=over 4
303
304=item Hatena Keyword Auto-Link API L<http://tinyurl.com/m5dkm> (redirect to
305d.hatena.ne.jp)
306
307=item Hatena Diary L<http://d.hatena.ne.jp/>
308
309=item Hatena L<http://www.hatena.ne.jp/>
310
311=back
312
313=head1 COPYRIGHT & LICENSE
314
315Copyright 2006 Naoya Ito, all rights reserved.
316
317This program is free software; you can redistribute it and/or modify it
318under the same terms as Perl itself.
319