1# $Id: AnyDBM_File.pm,v 1.1.1.1 2003/08/02 23:40:21 takezoe Exp $ 2 3package WWW::RobotRules::AnyDBM_File; 4 5require WWW::RobotRules; 6@ISA = qw(WWW::RobotRules); 7$VERSION = sprintf("%d.%02d", q$Revision: 1.1.1.1 $ =~ /(\d+)\.(\d+)/); 8 9use Carp (); 10use AnyDBM_File; 11use Fcntl; 12use strict; 13 14=head1 NAME 15 16WWW::RobotRules::AnyDBM_File - Persistent RobotRules 17 18=head1 SYNOPSIS 19 20 require WWW::RobotRules::AnyDBM_File; 21 require LWP::RobotUA; 22 23 # Create a robot useragent that uses a diskcaching RobotRules 24 my $rules = new WWW::RobotRules::AnyDBM_File 'my-robot/1.0', 'cachefile'; 25 my $ua = new WWW::RobotUA 'my-robot/1.0', 'me@foo.com', $rules; 26 27 # Then just use $ua as usual 28 $res = $ua->request($req); 29 30=head1 DESCRIPTION 31 32This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File 33package to implement persistent diskcaching of F<robots.txt> and host 34visit information. 35 36The constructor (the new() method) takes an extra argument specifying 37the name of the DBM file to use. If the DBM file already exists, then 38you can specify undef as agent name as the name can be obtained from 39the DBM database. 40 41=cut 42 43sub new 44{ 45 my ($class, $ua, $file) = @_; 46 Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file; 47 48 my $self = bless { }, $class; 49 $self->{'filename'} = $file; 50 tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640 51 or Carp::croak("Can't open $file: $!"); 52 53 if ($ua) { 54 $self->agent($ua); 55 } else { 56 # Try to obtain name from DBM file 57 $ua = $self->{'dbm'}{"|ua-name|"}; 58 Carp::croak("No agent name specified") unless $ua; 59 } 60 61 $self; 62} 63 64sub agent { 65 my($self, $newname) = @_; 66 my $old = $self->{'dbm'}{"|ua-name|"}; 67 if (defined $newname) { 68 $newname =~ s!/?\s*\d+.\d+\s*$!!; # loose version 69 unless ($old && $old eq $newname) { 70 # Old info is now stale. 71 my $file = $self->{'filename'}; 72 untie %{$self->{'dbm'}}; 73 tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640; 74 %{$self->{'dbm'}} = (); 75 $self->{'dbm'}{"|ua-name|"} = $newname; 76 } 77 } 78 $old; 79} 80 81sub no_visits { 82 my ($self, $netloc) = @_; 83 my $t = $self->{'dbm'}{"$netloc|vis"}; 84 return 0 unless $t; 85 (split(/;\s*/, $t))[0]; 86} 87 88sub last_visit { 89 my ($self, $netloc) = @_; 90 my $t = $self->{'dbm'}{"$netloc|vis"}; 91 return undef unless $t; 92 (split(/;\s*/, $t))[1]; 93} 94 95sub fresh_until { 96 my ($self, $netloc, $fresh) = @_; 97 my $old = $self->{'dbm'}{"$netloc|exp"}; 98 if ($old) { 99 $old =~ s/;.*//; # remove cleartext 100 } 101 if (defined $fresh) { 102 $fresh .= "; " . localtime($fresh); 103 $self->{'dbm'}{"$netloc|exp"} = $fresh; 104 } 105 $old; 106} 107 108sub visit { 109 my($self, $netloc, $time) = @_; 110 $time ||= time; 111 112 my $count = 0; 113 my $old = $self->{'dbm'}{"$netloc|vis"}; 114 if ($old) { 115 my $last; 116 ($count,$last) = split(/;\s*/, $old); 117 $time = $last if $last > $time; 118 } 119 $count++; 120 $self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time); 121} 122 123sub push_rules { 124 my($self, $netloc, @rules) = @_; 125 my $cnt = 1; 126 $cnt++ while $self->{'dbm'}{"$netloc|r$cnt"}; 127 128 foreach (@rules) { 129 $self->{'dbm'}{"$netloc|r$cnt"} = $_; 130 $cnt++; 131 } 132} 133 134sub clear_rules { 135 my($self, $netloc) = @_; 136 my $cnt = 1; 137 while ($self->{'dbm'}{"$netloc|r$cnt"}) { 138 delete $self->{'dbm'}{"$netloc|r$cnt"}; 139 $cnt++; 140 } 141} 142 143sub rules { 144 my($self, $netloc) = @_; 145 my @rules = (); 146 my $cnt = 1; 147 while (1) { 148 my $rule = $self->{'dbm'}{"$netloc|r$cnt"}; 149 last unless $rule; 150 push(@rules, $rule); 151 $cnt++; 152 } 153 @rules; 154} 155 156sub dump 157{ 158} 159 1601; 161 162=head1 SEE ALSO 163 164L<WWW::RobotRules>, L<LWP::RobotUA> 165 166=head1 AUTHORS 167 168Hakan Ardo E<lt>hakan@munin.ub2.lu.se>, Gisle Aas E<lt>aas@sn.no> 169 170=cut 171 172