1#!/usr/bin/env perl
2use strict;
3use warnings;
4
5use URI ();
6use XML::LibXML ();
7use Web::Scraper::LibXML qw( scraper process );
8
9use lib 'lib';
10use XML::Atom::SimpleFeed;
11
12sub SOURCE_URI () { 'http://slackware.com/' }
13
14sub trim($) { my $_ = shift; s!\A\s+!!; s!\s+\z!!; $_ }
15
16my $p = XML::LibXML->new;
17
18my $posts = scraper {
19	process 'center > table[width="100%"]', 'posts[]' => scraper {
20		process 'table[cellpadding="14"] td[bgcolor="#fefefe"]', body => sub {
21			my $c = $_->as_XML;
22			$c =~ s/
/ /g;
23			$c =~ s/\s+/ /g;
24			$c = $p->parse_string( trim $c );
25			trim join '', map $_->toString, $c->documentElement->childNodes;
26		};
27		process 'td > b', title => 'TEXT';
28		process 'td > center > font[size="-1"] > b', date => 'TEXT';
29	};
30};
31
32my $res = $posts->scrape( URI->new( SOURCE_URI ) );
33
34my $f = XML::Atom::SimpleFeed->new(
35	title  => 'Slackware.com',
36	id     => 'urn:uuid:ce386280-61e7-11da-9fcb-dd680b0526e0',
37	icon   => 'http://www.slackware.com/favicon.ico',
38	link   => SOURCE_URI,
39	author => 'The Slackware Team',
40);
41
42$f->add_entry(
43	title     => trim $_->{title},
44	content   => trim $_->{body},
45	id        => 'tag:plasmasturm.org,2005:Slackware-News-' . trim $_->{date},
46	updated   => trim( $_->{date} ) . 'T12:00:00Z',
47) for @{ $res->{posts} };
48
49$f->print;
50