1package Digest::ManberHash;
2
3=head1 NAME
4
5Digest::ManberHash - a Perl package to calculate Manber Hashes
6
7=head1 SYNOPSIS
8
9  use Digest::ManberHash;
10
11  $instance = Digest::ManberHash::new($maskbits, $prime, $charcount);
12
13  $hash1 = $instance->DoHash($filename1);
14  $hash2 = $instance->DoHash($filename2);
15
16  $similarity = $instance->Compare($hash1, $hash2);
17
18=head1 DESCRIPTION
19
20=head2 Initialization
21
22Use C<Digest::ManberHash::new>.
23Parameters:
24
25=over 4
26
27=item maskbits
28
29range 1 .. 30, default 11.
30
31=item prime
32
33range 3 .. 65537, default 7.
34
35=item charcount
36
37range 8 .. 32768, default 64.
38
39=back
40
41For a detailed description please read http://citeseer.nj.nec.com/manber94finding.html.
42
43
44=head2 Calculating hashes
45
46  $hash = $instance->DoHash($filename);
47
48This gives an object, which has an hash of hash values stored within.
49
50
51=head2 Comparing hashes
52
53  $similarity = $instance->Compare($hash1, $hash2);
54
55This gives an value of 0.0 .. 1.0, depending on the similariness.
56Help wanted: The calculation could do better than now!!
57
58
59=cut
60
61require Exporter;
62require DynaLoader;
63
64our @ISA = qw(Exporter DynaLoader);
65# Items to export into callers namespace by default. Note: do not export
66# names by default without a very good reason. Use EXPORT_OK instead.
67# Do not simply export all your public functions/methods/constants.
68our @EXPORT = qw(
69    HashFile
70    new
71    Compare
72    );
73our $VERSION = '0.7';
74
75
76sub new
77{
78  my($class, $maskbits, $prime, $charcount)=@_;
79  my($x,%a);
80
81  $prime||=7;
82  $maskbits||=11;
83  $charcount||=64;
84
85  $x=Init($prime,$maskbits,$charcount);
86  %a=( "settings" => $x );
87
88  bless \%a;
89}
90
91sub DoHash
92{
93  my($self,$filename)=@_;
94  my($e,$f,%a,%b);
95
96  %b=();
97  ManberHash($self->{"settings"}, $filename, \%b );
98  %a= ( "data" => \%b, "base" => $self);
99
100  while (($e, $f) = each(%b))
101  {
102    $self->{"max"}{$e}=$f if $self->{"max"}{$e} < $f;
103  }
104
105  bless \%a;
106}
107
108sub Compare
109{
110  my($self,$file1,$file2)=@_;
111  my(%keys,$a,$k,$c,$v,$m);
112
113  #return 0 if (ref($self) !~ /^HASH/);
114  die if $self ne $file1->{"base"} ||
115$self ne $file2->{"base"};
116
117
118  %keys=map { $_,1; } (keys %{$file1->{"data"}}, keys %{$file2->{"data"}});
119  $c=$a=$m=0;
120  for $k (keys %keys)
121  {
122    $v = ($file1->{"data"}->{$k} - $file2->{"data"}->{$k});
123#    $m += $self->{"max"}{$k} * $self->{"max"}{$k};
124    $a += $v*$v;
125    $c++;
126#    print "$k = ",$self->{$k}," - ",$other->{$k},"($c, $a)\n";
127  }
128
129  return 0 if !$c;
130#  1 - 6*$a/($c*$c*$c - $c);
131#  1-sqrt($a)/$c;
132  1/(1.0+$a);
133}
134
135bootstrap Digest::ManberHash $VERSION;
136
137# Preloaded methods go here.
138
139# Autoload methods go after __END__, and are processed by the autosplit program.
140
1411;
142__END__
143#
144
145
146