1#!/usr/bin/env perl 2use 5.012; 3use warnings; 4 5use CPAN::DistnameInfo; 6use Cwd qw(realpath); 7use DB_File; 8use File::Find; 9use File::Spec::Functions qw(catfile catpath splitpath tmpdir); 10use PerlIO::gzip; 11use WebService::Google::Reader; 12 13use constant VERBOSE => not $ENV{CRON}; 14 15my $dir = catpath((splitpath(realpath __FILE__))[0, 1]); 16my $file = catfile($dir, '.unwanted-modules.bdb'); 17my $db = tie my %db, DB_File => $file; 18 19my $reader = WebService::Google::Reader->new( 20 host => 'www.inoreader.com', 21 username => $ENV{GOOGLE_USERNAME}, 22 password => $ENV{GOOGLE_PASSWORD}, 23); 24 25# Return a sub that checks if the given Perl dist is installed. 26my $is_perl_dist_installed = do { 27 # Find the list of installed modules. 28 my (%modules, %prune); 29 for my $top (reverse sort @INC) { 30 next if '.' eq $top; 31 my $len = length $top; 32 find { 33 wanted => sub { 34 if ($File::Find::dir ~~ %prune) { 35 $File::Find::prune = 1; 36 return; 37 } 38 return unless '.pm' eq substr $_, -3, 3, ''; 39 return unless -f _; 40 substr $_, 0, 1 + $len, ''; 41 s{[\\/]}{::}g; 42 $modules{$_} = undef; 43 }, 44 no_chdir => 1 45 }, $top; 46 $prune{$top} = undef; 47 } 48 49 # Fetch the file that maps packages to distributions. 50 my $file = '02packages.details.txt.gz'; 51 my $url = "http://search.cpan.org/CPAN/modules/$file"; 52 $file = catfile(tmpdir, $file); 53 if (not -r $file or 1 < -M _) { 54 my $res = $reader->ua->mirror($url, $file); 55 die "Failed to mirror $file; " if $res->is_error; 56 } 57 open my $fh, '<:gzip', $file or die "$file: $!"; 58 59 # Skip header. 60 while (<$fh>) { last when "\n" } 61 62 # Determine the installed distributions, given the installed modules. 63 my %dists; 64 while (my $line = <$fh>) { 65 my ($package, $version, $dist) = split /\s+/, $line; 66 next unless $package ~~ %modules; 67 $dists{ CPAN::DistnameInfo->new($dist)->dist } = undef; 68 } 69 close $fh; 70 71 sub { $_[0] ~~ %dists; } 72}; 73 74my %conf = ( 75 perl => { 76 url => 'http://search.cpan.org/uploads.rdf', 77 name => sub { 78 CPAN::DistnameInfo->new($_[0]->title . '.tgz')->dist 79 }, 80 whitelist => [ $is_perl_dist_installed ], 81 }, 82 python => { 83 url => 'http://pypi.python.org/pypi?:action=rss', 84 name => sub { 85 my ($name) = $_[0]->link->href =~ m[ 86 ^http://pypi\.python\.org/pypi/([^/]+) 87 ]x; 88 return $name; 89 }, 90 blacklist => [ qr/ (?:\b|_) (?:django | plone | zope) (?:\b|_) /ix ], 91 }, 92 ruby => { 93 url => 'http://feeds.feedburner.com/gemcutter-latest', 94 name => sub { 95 my ($name) = $_[0]->link->href =~ m[ 96 ^http://rubygems\.org/gems/([^?/]+) 97 ]x; 98 return $name; 99 }, 100 blacklist => [ 101 qr/ (?:\b|_) (?:rails | active\W?record) (?:\b|_) /ix, 102 ], 103 }, 104 haskell => { 105 url => 'http://hackage.haskell.org/packages/archive/recent.rss', 106 name => sub { ($_[0]->title =~ m[^\s*(\S+)])[0] }, 107 }, 108 vimscripts => { 109 url => 'http://feed43.com/vim-scripts.xml', 110 name => sub { 111 my ($title, $name) = $_[0]->title =~ m[^\s*((.*?)\s+\S+) --]; 112 $_[0]->title($title); 113 return $name; 114 }, 115 }, 116); 117 118# Get list of feed subscription times. 119my %subs; 120for my $sub ($reader->feeds) { 121 (my $id = $sub->id) =~ s[^feed/][] or next; 122 $subs{$id} = int $sub->firstitemmsec / 1000; 123} 124 125while (my ($lang, $conf) = each %conf) { 126 my $feed = $reader->feed( 127 $conf->{url}, 128 count => 100, 129 exclude => { state => 'read' }, 130 start_time => $subs{$conf->{url}}, 131 ); 132 die $reader->error unless $feed; 133 134 my @blacklist = @{ $conf->{blacklist} || [] }; 135 my @whitelist = @{ $conf->{whitelist} || [] }; 136 137 my @unwanted_entries; 138 { 139 for my $entry ($feed->entries) { 140 my $name = $conf->{name}->($entry); 141 my $title = $entry->title; 142 my $summary = $entry->summary || ''; 143 my $desc = $entry->content || ''; 144 $desc &&= $desc->body; 145 # say "$lang | $name | $title | $summary | $desc\n" and next; 146 147 unless ($name) { 148 warn "Couldn't extract name from $title\n"; 149 next; 150 } 151 152 my $listed; 153 for my $w (@whitelist) { 154 next if not $name ~~ $w; 155 $listed = 1; 156 VERBOSE && say "$lang - $name - whitelisted"; 157 last; 158 } 159 next if $listed; 160 161 for my $b (@blacklist) { 162 next if not [$name, $title, $summary, $desc] ~~ $b; 163 $listed = 1; 164 push @unwanted_entries, $entry; 165 VERBOSE && say "$lang - $name - blacklisted"; 166 last; 167 } 168 next if $listed; 169 170 if ("$lang|$name" ~~ %db and $title ne $db{"$lang|$name"}) { 171 push @unwanted_entries, $entry; 172 VERBOSE && say "$lang - $name - unwanted because seen"; 173 } 174 else { 175 $db{"$lang|$name"} = $title; 176 } 177 } 178 179 sleep 0.25; 180 redo if $reader->more($feed); 181 } 182 183 $reader->mark_read_entry(\@unwanted_entries); 184} 185