1#------------------------------------------------------------------------------
2# Copyright (C) 2010, Shaun Amott <shaun@inerd.com>
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions
7# are met:
8# 1. Redistributions of source code must retain the above copyright
9#    notice, this list of conditions and the following disclaimer.
10# 2. Redistributions in binary form must reproduce the above copyright
11#    notice, this list of conditions and the following disclaimer in the
12#    documentation and/or other materials provided with the distribution.
13#
14# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24# SUCH DAMAGE.
25#
26# $Id: SourceForge.pm,v 1.8 2010/05/05 01:54:16 samott Exp $
27#------------------------------------------------------------------------------
28
29package Portscout::SiteHandler::SourceForge;
30
31use XML::XPath;
32use XML::XPath::XMLParser;
33use LWP::UserAgent;
34
35use Portscout::Const;
36use Portscout::Config;
37
38use strict;
39
40require 5.006;
41
42
43#------------------------------------------------------------------------------
44# Globals
45#------------------------------------------------------------------------------
46
47push @Portscout::SiteHandler::sitehandlers, __PACKAGE__;
48
49our %settings;
50
51
52#------------------------------------------------------------------------------
53# Func: new()
54# Desc: Constructor.
55#
56# Args: n/a
57#
58# Retn: $self
59#------------------------------------------------------------------------------
60
61sub new
62{
63	my $self      = {};
64	my $class     = shift;
65
66	$self->{name} = 'SourceForge';
67
68	bless ($self, $class);
69	return $self;
70}
71
72
73#------------------------------------------------------------------------------
74# Func: CanHandle()
75# Desc: Ask if this handler (package) can handle the given site.
76#
77# Args: $url - URL of site.
78#
79# Retn: $res - true/false.
80#------------------------------------------------------------------------------
81
82sub CanHandle
83{
84	my $self = shift;
85
86	my ($url) = @_;
87
88	return ($url =~ /^https?:\/\/[^\/]*?\.sourceforge\.net\/project\//);
89}
90
91
92#------------------------------------------------------------------------------
93# Func: GetFiles()
94# Desc: Extract a list of files from the given URL. In the case of SourceForge,
95#       we are actually pulling the files from an RSS feed helpfully provided
96#       for each "project".
97#
98# Args: $url     - URL we would normally fetch from.
99#       \%port   - Port hash fetched from database.
100#       \@files  - Array to put files into.
101#
102# Retn: $success - False if file list could not be constructed; else, true.
103#------------------------------------------------------------------------------
104
105sub GetFiles
106{
107	my $self = shift;
108
109	my ($url, $port, $files) = @_;
110
111	if ($url =~ /[^\/]*\/project\/([^\/]*)\//) {
112		my ($rsspage, $projname, $ua, $response, $xpath, $items);
113
114		$projname = $1;
115
116		# Find the RSS feed for this project.
117		$rsspage = 'http://sourceforge.net/api/file/index/project-name/'
118			. $projname . '/mtime/desc/rss';
119
120		_debug("Trying RSS @ $rsspage");
121
122		$ua = LWP::UserAgent->new;
123
124		$ua->agent(USER_AGENT);
125		$ua->timeout($settings{http_timeout});
126
127		$response = $ua->get($rsspage);
128
129		if (!$response->is_success || $response->status_line !~ /^2/) {
130			_debug('RSS feed failed: ' . $response->status_line);
131			return 0;
132		}
133
134		$xpath = XML::XPath->new(xml => $response->content);
135
136		$items = $xpath->findnodes('/rss/channel/item');
137
138		foreach my $item ($items->get_nodelist) {
139			my ($data, $tnode, $file, $lnode, $url);
140
141			$data = $xpath->findnodes('./title', $item);
142			$tnode = ($data->get_nodelist)[0];
143			$file = "/project/$projname" . $tnode->string_value();
144
145			# There doesn't seem to be a canonical way of
146			# determining which entries are directories;
147			# but directories seem to (rightly) have
148			# trailing slashes in the full URL, in <link />.
149
150			$data = $xpath->findnodes('./link', $item);
151			$lnode = ($data->get_nodelist)[0];
152			$url = $lnode->string_value();
153
154			next if ($url =~ /\/$/);
155
156			# Note this file.
157
158			push @$files, $file;
159		}
160
161		_debug('Found ' . scalar @$files . ' files');
162	} else {
163		return 0;
164	}
165
166	return 1;
167}
168
169
170#------------------------------------------------------------------------------
171# Func: _debug()
172# Desc: Print a debug message.
173#
174# Args: $msg - Message.
175#
176# Retn: n/a
177#------------------------------------------------------------------------------
178
179sub _debug
180{
181	my ($msg) = @_;
182
183	$msg = '' if (!$msg);
184
185	print STDERR "(SiteHandler::SourceForge) $msg\n"
186		if ($settings{debug});
187}
188
189
1901;
191