1package WWW::TinySong;
2
3=head1 NAME
4
5WWW::TinySong - Get free music links from tinysong.com
6
7=head1 SYNOPSIS
8
9  # basic use
10
11  use WWW::TinySong qw(search);
12
13  for(search("we are the champions")) {
14      printf("%s", $_->{songName});
15      printf(" by %s", $_->{artistName});
16      printf(" on %s", $_->{albumName}) if $_->{albumName};
17      printf(" <%s>\n", $_->{tinysongLink});
18  }
19
20  # customize the user agent
21
22  use LWP::UserAgent;
23
24  my $ua = new LWP::UserAgent;
25  $ua->timeout(10);
26  $ua->env_proxy;
27
28  WWW::TinySong->ua($ua);
29
30  # customize the service
31
32  WWW::TinySong->service('http://tinysong.com/');
33
34  # tolerate some server errors
35
36  WWW::TinySong->retries(5);
37
38=head1 DESCRIPTION
39
40tinysong.com is a web app that can be queried for a song and returns a tiny
41URL, allowing you to listen to the song for free online and share it with
42friends.  L<WWW::TinySong> is a Perl interface to this service, allowing you
43to programmatically search its underlying database.
44
45=cut
46
47use 5.006;
48use strict;
49use warnings;
50
51use Carp;
52use Exporter;
53use CGI;
54use HTML::Parser;
55
56our @EXPORT_OK = qw(link search);
57our @ISA       = qw(Exporter);
58our $VERSION   = '1.01';
59
60my($ua, $service, $retries);
61
62=head1 FUNCTIONS
63
64The do-it-all function is C<search>.  If you just want a tiny URL, use C<link>.
65These two functions may be C<import>ed and used like any other function.
66C<call> and C<parse> are provided so that you can (hopefully) continue to use
67this module if the tinysong.com API is extended and I'm too lazy or busy to
68update, but you will probably not need to use them otherwise.  The other public
69functions are either aliases for one of the above or created to allow the
70customization of requests issued by this module.
71
72=over 4
73
74=item link( $SEARCH_TERMS )
75
76=item WWW::TinySong->link( $SEARCH_TERMS )
77
78=cut
79
80sub link {
81    unshift @_, __PACKAGE__ # add the package name unless already there
82        unless defined($_[0]) && UNIVERSAL::isa($_[0], __PACKAGE__);
83    return shift->a(@_);
84}
85
86=item WWW::TinySong->a( $SEARCH_TERMS )
87
88Returns the short URL corresponding to the top result of searching with the
89specified song and artist name terms or C<undef> if no song was found.
90
91=cut
92
93sub a {
94    my($pkg, $search_terms) = @_;
95    my $ret = $pkg->call('a', $search_terms);
96    $ret =~ s/\s+//g;
97    return $ret =~ /^NSF;?$/ ? undef : $ret;
98}
99
100=item search( $SEARCH_TERMS [, $LIMIT ] )
101
102=item WWW::TinySong->search( $SEARCH_TERMS [, $LIMIT ] )
103
104=cut
105
106sub search {
107    unshift @_, __PACKAGE__ # add the package name unless already there
108        unless defined($_[0]) && UNIVERSAL::isa($_[0], __PACKAGE__);
109    return shift->s(@_);
110}
111
112=item WWW::TinySong->s( $SEARCH_TERMS [, $LIMIT ] )
113
114Searches for the specified song and artist name terms, giving up to $LIMIT
115results.  $LIMIT defaults to 10 if not C<defined>.  Returns an array in list
116context or the top result in scalar context.  Return elements are hashrefs with
117keys C<qw(tinysongLink songID songName artistID artistName albumID albumName
118groovesharkLink)> as given by C<parse>.  Here's a quick script to demonstrate:
119
120  #!/usr/bin/perl
121
122  use WWW::TinySong qw(search);
123  use Data::Dumper;
124
125  print Dumper search("three little birds", 3);
126
127...and its output on my system at the time of this writing:
128
129  $VAR1 = {
130            'artistName' => 'Bob Marley',
131            'albumName' => 'Legend',
132            'songName' => 'Three Little Birds',
133            'artistID' => '139',
134            'tinysongLink' => 'http://tinysong.com/eg9',
135            'songID' => '1302',
136            'albumID' => '97291',
137            'groovesharkLink' => 'http://listen.grooveshark.com/song/Three_Little_Birds/1302'
138          };
139  $VAR2 = {
140            'artistName' => 'Bob Marley',
141            'albumName' => 'One Love: The Very Best Of Bob Marley & The Wailers',
142            'songName' => 'Three Little Birds',
143            'artistID' => '139',
144            'tinysongLink' => 'http://tinysong.com/lf2',
145            'songID' => '3928811',
146            'albumID' => '221021',
147            'groovesharkLink' => 'http://listen.grooveshark.com/song/Three_Little_Birds/3928811'
148          };
149  $VAR3 = {
150            'artistName' => 'Bob Marley & The Wailers',
151            'albumName' => 'Exodus',
152            'songName' => 'Three Little Birds',
153            'artistID' => '848',
154            'tinysongLink' => 'http://tinysong.com/egc',
155            'songID' => '3700',
156            'albumID' => '2397306',
157            'groovesharkLink' => 'http://listen.grooveshark.com/song/Three_Little_Birds/3700'
158          };
159
160=cut
161
162sub s {
163    my($pkg, $search_terms, $limit) = @_;
164
165    if(wantarray) {
166        $limit = 10 unless defined $limit;
167    }
168    else {
169        $limit = 1; # no point in searching for more if only one is needed
170    }
171
172    my @ret = $pkg->parse($pkg->call('s', $search_terms,
173        {limit => $limit}));
174
175    return wantarray ? @ret : $ret[0];
176}
177
178=item WWW::TinySong->b( $SEARCH_TERMS )
179
180Searches for the specified song and artist name terms, giving the top result.
181I'm not really sure why this is part of the API because the same result can be
182obtained by limiting a C<search> to one result, but it's included here for
183completeness.
184
185=cut
186
187sub b {
188    my($pkg, $search_terms) = @_;
189    return ($pkg->parse($pkg->call('b', $search_terms)))[0];
190}
191
192=item WWW::TinySong->call( $METHOD , $SEARCH_TERMS [, \%EXTRA_PARAMS ] )
193
194Calls API "method" $METHOD using the specified $SEARCH_TERMS and optional
195hashref of extra parameters.  Whitespace sequences in $SEARCH_TERMS will be
196converted to pluses.  Returns the entire response as a string.  Unless you're
197just grabbing a link, you will probably want to pass the result through
198C<parse>.
199
200=cut
201
202sub call {
203    my($pkg, $method, $search_terms, $param) = @_;
204    croak 'Empty method not allowed' unless length($method);
205
206    $search_terms =~ s/[\s\+]+/+/g;
207    $search_terms =~ s/^\+//;
208    $search_terms =~ s/\+$//;
209    croak 'Empty search terms not allowed' unless length($search_terms);
210    my $url = join('/', $pkg->service, CGI::escape($method), $search_terms);
211
212    $param ||= {};
213    $param = join('&', map
214        { sprintf('%s=%s', CGI::escape($_), CGI::escape($param->{$_})) }
215        keys %$param);
216    $url .= "?$param" if $param;
217
218    return $pkg->_get($url);
219}
220
221=item WWW::TinySong->parse( [ @RESULTS ] )
222
223Parses all the lines in the given list of results according to the specs,
224building and returning a (possibly empty) list of hashrefs with the keys
225C<qw(tinysongLink songID songName artistID artistName albumID albumName
226groovesharkLink)>, whose meanings are hopefully self-explanatory.
227
228=cut
229
230sub parse {
231    my $pkg = shift;
232    return map {
233        /^(http:\/\/.*); (\d*); (.*); (\d*); (.*); (\d*); (.*); (http:\/\/.*)$/
234            or croak 'Result in unexpected format';
235        {
236            tinysongLink    => $1,
237            songID          => $2,
238            songName        => $3,
239            artistID        => $4,
240            artistName      => $5,
241            albumID         => $6,
242            albumName       => $7,
243            groovesharkLink => $8,
244        }
245    } grep { !/^NSF;\s*$/ } map {chomp; split(/\n/, $_)} @_;
246}
247
248=item WWW::TinySong->scrape( $QUERY_STRING [, $LIMIT ] )
249
250Searches for $QUERY_STRING by scraping, giving up to $LIMIT results.  $LIMIT
251defaults to 10 if not C<defined>.  Returns an array in list context or the
252top result in scalar context.  Return elements are hashrefs with keys
253C<qw(albumName artistName songName tinysongLink)>.  Their values will be the
254empty string if not given by the website.  As an example, executing:
255
256  #!/usr/bin/perl
257
258  use WWW::TinySong;
259  use Data::Dumper;
260
261  print Dumper(WWW::TinySong->scrape("we can work it out", 3));
262
263...prints something like:
264
265  $VAR1 = {
266            'artistName' => 'The Beatles',
267            'tinysongLink' => 'http://tinysong.com/5Ym',
268            'songName' => 'We Can Work It Out',
269            'albumName' => 'The Beatles 1'
270          };
271  $VAR2 = {
272            'artistName' => 'The Beatles',
273            'tinysongLink' => 'http://tinysong.com/uLd',
274            'songName' => 'We Can Work It Out',
275            'albumName' => 'We Can Work It Out / Day Tripper'
276          };
277  $VAR3 = {
278            'artistName' => 'The Beatles',
279            'tinysongLink' => 'http://tinysong.com/2EaX',
280            'songName' => 'We Can Work It Out',
281            'albumName' => 'The Beatles 1967-70'
282          };
283
284This function is how the primary functionality of the module was implemented in
285the 0.0x series.  It remains here as a tribute to the past, but should be
286avoided because scraping depends on the details of the response HTML, which may
287change at any time (and in fact did at some point between versions 0.05 and
2880.06).  Interestingly, this function does currently have one advantage over the
289robust alternative: whereas C<search> is limited to a maximum of 32 results by
290the web service, scraping doesn't seem to be subjected to this requirement.
291
292=cut
293
294sub scrape {
295    my($pkg, $query_string, $limit) = @_;
296    if(wantarray) {
297        $limit = 10 unless defined $limit;
298    }
299    else {
300        $limit = 1; # no point in searching for more if only one is needed
301    }
302
303    my $service = $pkg->service;
304
305    my $response = $pkg->_get(sprintf('%s?s=%s&limit=%d', $service,
306        CGI::escape($query_string), $limit));
307
308    my @ret           = ();
309    my $inside_list   = 0;
310    my $current_class = undef;
311
312    my $start_h = sub {
313        my $tagname = lc(shift);
314        my $attr    = shift;
315        if(    $tagname eq 'ul'
316            && defined($attr->{id})
317            && lc($attr->{id}) eq 'results')
318        {
319            $inside_list = 1;
320        }
321        elsif($inside_list) {
322            if($tagname eq 'span') {
323                my $class = $attr->{class};
324                if(    defined($class)
325                    && $class =~ /^(?:album|artist|song title)$/i) {
326                    $current_class = lc $class;
327                    croak 'Unexpected results while parsing HTML'
328                        if !@ret || defined($ret[$#ret]->{$current_class});
329                }
330            }
331            elsif($tagname eq 'a' && $attr->{class} eq 'link') {
332                my $href = $attr->{href};
333                croak 'Bad song link' unless defined $href;
334                croak 'Song link doesn\'t seem to match service'
335                    unless substr($href, 0, length($service)) eq $service;
336                push @ret, {tinysongLink => $href};
337            }
338        }
339    };
340
341    my $text_h = sub {
342        return unless $inside_list && $current_class;
343        my $text = shift;
344        $ret[$#ret]->{$current_class} = $text;
345        undef $current_class;
346    };
347
348    my $end_h = sub {
349        return unless $inside_list;
350        my $tagname = lc(shift);
351        if($tagname eq 'ul') {
352            $inside_list = 0;
353        }
354        elsif($tagname eq 'span') {
355            undef $current_class;
356        }
357    };
358
359    my $parser = HTML::Parser->new(
360        api_version     => 3,
361        start_h         => [$start_h, 'tagname, attr'],
362        text_h          => [$text_h, 'text'],
363        end_h           => [$end_h, 'tagname'],
364        marked_sections => 1,
365    );
366    $parser->parse($response);
367    $parser->eof;
368
369    for my $res (@ret) {
370    	$res = {
371    	    albumName    => $res->{album} || '',
372    	    artistName   => $res->{artist} || '',
373    	    songName     => $res->{'song title'} || '',
374    	    tinysongLink => $res->{tinysongLink} || '',
375    	};
376        $res->{albumName}  =~ s/^\s+on\s//;
377        $res->{artistName} =~ s/^\s+by\s//;
378    }
379
380    return wantarray ? @ret : $ret[0];
381}
382
383=item WWW::TinySong->ua( [ $USER_AGENT ] )
384
385Returns the user agent object used by this module for web retrievals, first
386setting it to $USER_AGENT if it's specified.  Defaults to a C<new>
387L<LWP::UserAgent>.  If you explicitly set this, you don't have to use a
388LWP::UserAgent, it may be anything that can C<get> a URL and return a
389response object.
390
391=cut
392
393sub ua {
394    if($_[1]) {
395        $ua = $_[1];
396    }
397    elsif(!$ua) {
398        eval {
399            require LWP::UserAgent;
400            $ua = new LWP::UserAgent;
401        };
402        carp 'Problem setting user agent' if $@;
403    }
404    return $ua;
405}
406
407=item WWW::TinySong->service( [ $URL ] )
408
409Returns the web address of the service used by this module, first setting
410it to $URL if it's specified.  Defaults to <http://tinysong.com/>.
411
412=cut
413
414sub service {
415    return $service = $_[1] ? $_[1] : $service || 'http://tinysong.com/';
416}
417
418=item WWW::TinySong->retries( [ $COUNT ] )
419
420Returns the number of consecutive internal server errors the module will ignore
421before failing, first setting it to $COUNT if it's specified.  Defaults to 0
422(croak, do not retry in case of internal server error).  This was created
423because read timeouts seem to be a common problem with the web service.  The
424module now provides the option of doing something more useful than immediately
425failing.
426
427=cut
428
429sub retries {
430    return $retries = $_[1] ? $_[1] : $retries || 0;
431}
432
433=back
434
435=cut
436
437################################################################################
438
439sub _get {
440    my($response, $pkg, $url) = (undef, @_);
441    for(0..$pkg->retries) {
442        $response = $pkg->ua->get($url);
443        last if $response->is_success;
444        croak $response->message || $response->status_line
445            if $response->is_error && $response->code != 500;
446    }
447    return $response->decoded_content || $response->content;
448}
449
4501;
451
452__END__
453
454=head1 BE NICE TO THE SERVERS
455
456Please don't abuse the tinysong.com web service.  If you anticipate making
457a large number of requests, don't make them too frequently.  There are
458several CPAN modules that can help you make sure your code is nice.  Try,
459for example, L<LWP::RobotUA> as the user agent:
460
461  use WWW::TinySong qw(search link);
462  use LWP::RobotUA;
463
464  my $ua = LWP::RobotUA->new('my-nice-robot/0.1', 'me@example.org');
465
466  WWW::TinySong->ua($ua);
467
468  # search() and link() should now be well-behaved
469
470=head1 SEE ALSO
471
472L<http://tinysong.com/>, L<LWP::UserAgent>, L<LWP::RobotUA>
473
474=head1 BUGS
475
476Please report them!  The preferred way to submit a bug report for this module
477is through CPAN's bug tracker:
478L<http://rt.cpan.org/Public/Dist/Display.html?Name=WWW-TinySong>.  You may
479also create an issue at L<http://elementsofpuzzle.googlecode.com/> or drop
480me an e-mail.
481
482=head1 AUTHOR
483
484Miorel-Lucian Palii, E<lt>mlpalii@gmail.comE<gt>
485
486=head1 VERSION
487
488Version 1.01  (June 26, 2009)
489
490The latest version is hosted on Google Code as part of
491L<http://elementsofpuzzle.googlecode.com/>.  Significant changes are also
492contributed to CPAN: L<http://search.cpan.org/dist/WWW-TinySong/>.
493
494=head1 COPYRIGHT AND LICENSE
495
496Copyright (C) 2009 by Miorel-Lucian Palii
497
498This library is free software; you can redistribute it and/or modify
499it under the same terms as Perl itself, either Perl version 5.8.8 or,
500at your option, any later version of Perl 5 you may have available.
501
502=cut
503