1# $Id: AnyDBM_File.pm,v 1.1.1.1 2003/08/02 23:40:21 takezoe Exp $
2
3package WWW::RobotRules::AnyDBM_File;
4
5require  WWW::RobotRules;
6@ISA = qw(WWW::RobotRules);
7$VERSION = sprintf("%d.%02d", q$Revision: 1.1.1.1 $ =~ /(\d+)\.(\d+)/);
8
9use Carp ();
10use AnyDBM_File;
11use Fcntl;
12use strict;
13
14=head1 NAME
15
16WWW::RobotRules::AnyDBM_File - Persistent RobotRules
17
18=head1 SYNOPSIS
19
20 require WWW::RobotRules::AnyDBM_File;
21 require LWP::RobotUA;
22
23 # Create a robot useragent that uses a diskcaching RobotRules
24 my $rules = new WWW::RobotRules::AnyDBM_File 'my-robot/1.0', 'cachefile';
25 my $ua = new WWW::RobotUA 'my-robot/1.0', 'me@foo.com', $rules;
26
27 # Then just use $ua as usual
28 $res = $ua->request($req);
29
30=head1 DESCRIPTION
31
32This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File
33package to implement persistent diskcaching of F<robots.txt> and host
34visit information.
35
36The constructor (the new() method) takes an extra argument specifying
37the name of the DBM file to use.  If the DBM file already exists, then
38you can specify undef as agent name as the name can be obtained from
39the DBM database.
40
41=cut
42
43sub new
44{
45  my ($class, $ua, $file) = @_;
46  Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file;
47
48  my $self = bless { }, $class;
49  $self->{'filename'} = $file;
50  tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640
51    or Carp::croak("Can't open $file: $!");
52
53  if ($ua) {
54      $self->agent($ua);
55  } else {
56      # Try to obtain name from DBM file
57      $ua = $self->{'dbm'}{"|ua-name|"};
58      Carp::croak("No agent name specified") unless $ua;
59  }
60
61  $self;
62}
63
64sub agent {
65    my($self, $newname) = @_;
66    my $old = $self->{'dbm'}{"|ua-name|"};
67    if (defined $newname) {
68	$newname =~ s!/?\s*\d+.\d+\s*$!!;  # loose version
69	unless ($old && $old eq $newname) {
70	# Old info is now stale.
71	    my $file = $self->{'filename'};
72	    untie %{$self->{'dbm'}};
73	    tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640;
74	    %{$self->{'dbm'}} = ();
75	    $self->{'dbm'}{"|ua-name|"} = $newname;
76	}
77    }
78    $old;
79}
80
81sub no_visits {
82    my ($self, $netloc) = @_;
83    my $t = $self->{'dbm'}{"$netloc|vis"};
84    return 0 unless $t;
85    (split(/;\s*/, $t))[0];
86}
87
88sub last_visit {
89    my ($self, $netloc) = @_;
90    my $t = $self->{'dbm'}{"$netloc|vis"};
91    return undef unless $t;
92    (split(/;\s*/, $t))[1];
93}
94
95sub fresh_until {
96    my ($self, $netloc, $fresh) = @_;
97    my $old = $self->{'dbm'}{"$netloc|exp"};
98    if ($old) {
99	$old =~ s/;.*//;  # remove cleartext
100    }
101    if (defined $fresh) {
102	$fresh .= "; " . localtime($fresh);
103	$self->{'dbm'}{"$netloc|exp"} = $fresh;
104    }
105    $old;
106}
107
108sub visit {
109    my($self, $netloc, $time) = @_;
110    $time ||= time;
111
112    my $count = 0;
113    my $old = $self->{'dbm'}{"$netloc|vis"};
114    if ($old) {
115	my $last;
116	($count,$last) = split(/;\s*/, $old);
117	$time = $last if $last > $time;
118    }
119    $count++;
120    $self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time);
121}
122
123sub push_rules {
124    my($self, $netloc, @rules) = @_;
125    my $cnt = 1;
126    $cnt++ while $self->{'dbm'}{"$netloc|r$cnt"};
127
128    foreach (@rules) {
129	$self->{'dbm'}{"$netloc|r$cnt"} = $_;
130	$cnt++;
131    }
132}
133
134sub clear_rules {
135    my($self, $netloc) = @_;
136    my $cnt = 1;
137    while ($self->{'dbm'}{"$netloc|r$cnt"}) {
138	delete $self->{'dbm'}{"$netloc|r$cnt"};
139	$cnt++;
140    }
141}
142
143sub rules {
144    my($self, $netloc) = @_;
145    my @rules = ();
146    my $cnt = 1;
147    while (1) {
148	my $rule = $self->{'dbm'}{"$netloc|r$cnt"};
149	last unless $rule;
150	push(@rules, $rule);
151	$cnt++;
152    }
153    @rules;
154}
155
156sub dump
157{
158}
159
1601;
161
162=head1 SEE ALSO
163
164L<WWW::RobotRules>, L<LWP::RobotUA>
165
166=head1 AUTHORS
167
168Hakan Ardo E<lt>hakan@munin.ub2.lu.se>, Gisle Aas E<lt>aas@sn.no>
169
170=cut
171
172