1#!/usr/bin/env perl 2use strict; 3use warnings; 4 5use URI (); 6use XML::LibXML (); 7use Web::Scraper::LibXML qw( scraper process ); 8 9use lib 'lib'; 10use XML::Atom::SimpleFeed; 11 12sub SOURCE_URI () { 'http://slackware.com/' } 13 14sub trim($) { my $_ = shift; s!\A\s+!!; s!\s+\z!!; $_ } 15 16my $p = XML::LibXML->new; 17 18my $posts = scraper { 19 process 'center > table[width="100%"]', 'posts[]' => scraper { 20 process 'table[cellpadding="14"] td[bgcolor="#fefefe"]', body => sub { 21 my $c = $_->as_XML; 22 $c =~ s/ / /g; 23 $c =~ s/\s+/ /g; 24 $c = $p->parse_string( trim $c ); 25 trim join '', map $_->toString, $c->documentElement->childNodes; 26 }; 27 process 'td > b', title => 'TEXT'; 28 process 'td > center > font[size="-1"] > b', date => 'TEXT'; 29 }; 30}; 31 32my $res = $posts->scrape( URI->new( SOURCE_URI ) ); 33 34my $f = XML::Atom::SimpleFeed->new( 35 title => 'Slackware.com', 36 id => 'urn:uuid:ce386280-61e7-11da-9fcb-dd680b0526e0', 37 icon => 'http://www.slackware.com/favicon.ico', 38 link => SOURCE_URI, 39 author => 'The Slackware Team', 40); 41 42$f->add_entry( 43 title => trim $_->{title}, 44 content => trim $_->{body}, 45 id => 'tag:plasmasturm.org,2005:Slackware-News-' . trim $_->{date}, 46 updated => trim( $_->{date} ) . 'T12:00:00Z', 47) for @{ $res->{posts} }; 48 49$f->print; 50