1package Digest::ManberHash; 2 3=head1 NAME 4 5Digest::ManberHash - a Perl package to calculate Manber Hashes 6 7=head1 SYNOPSIS 8 9 use Digest::ManberHash; 10 11 $instance = Digest::ManberHash::new($maskbits, $prime, $charcount); 12 13 $hash1 = $instance->DoHash($filename1); 14 $hash2 = $instance->DoHash($filename2); 15 16 $similarity = $instance->Compare($hash1, $hash2); 17 18=head1 DESCRIPTION 19 20=head2 Initialization 21 22Use C<Digest::ManberHash::new>. 23Parameters: 24 25=over 4 26 27=item maskbits 28 29range 1 .. 30, default 11. 30 31=item prime 32 33range 3 .. 65537, default 7. 34 35=item charcount 36 37range 8 .. 32768, default 64. 38 39=back 40 41For a detailed description please read http://citeseer.nj.nec.com/manber94finding.html. 42 43 44=head2 Calculating hashes 45 46 $hash = $instance->DoHash($filename); 47 48This gives an object, which has an hash of hash values stored within. 49 50 51=head2 Comparing hashes 52 53 $similarity = $instance->Compare($hash1, $hash2); 54 55This gives an value of 0.0 .. 1.0, depending on the similariness. 56Help wanted: The calculation could do better than now!! 57 58 59=cut 60 61require Exporter; 62require DynaLoader; 63 64our @ISA = qw(Exporter DynaLoader); 65# Items to export into callers namespace by default. Note: do not export 66# names by default without a very good reason. Use EXPORT_OK instead. 67# Do not simply export all your public functions/methods/constants. 68our @EXPORT = qw( 69 HashFile 70 new 71 Compare 72 ); 73our $VERSION = '0.7'; 74 75 76sub new 77{ 78 my($class, $maskbits, $prime, $charcount)=@_; 79 my($x,%a); 80 81 $prime||=7; 82 $maskbits||=11; 83 $charcount||=64; 84 85 $x=Init($prime,$maskbits,$charcount); 86 %a=( "settings" => $x ); 87 88 bless \%a; 89} 90 91sub DoHash 92{ 93 my($self,$filename)=@_; 94 my($e,$f,%a,%b); 95 96 %b=(); 97 ManberHash($self->{"settings"}, $filename, \%b ); 98 %a= ( "data" => \%b, "base" => $self); 99 100 while (($e, $f) = each(%b)) 101 { 102 $self->{"max"}{$e}=$f if $self->{"max"}{$e} < $f; 103 } 104 105 bless \%a; 106} 107 108sub Compare 109{ 110 my($self,$file1,$file2)=@_; 111 my(%keys,$a,$k,$c,$v,$m); 112 113 #return 0 if (ref($self) !~ /^HASH/); 114 die if $self ne $file1->{"base"} || 115$self ne $file2->{"base"}; 116 117 118 %keys=map { $_,1; } (keys %{$file1->{"data"}}, keys %{$file2->{"data"}}); 119 $c=$a=$m=0; 120 for $k (keys %keys) 121 { 122 $v = ($file1->{"data"}->{$k} - $file2->{"data"}->{$k}); 123# $m += $self->{"max"}{$k} * $self->{"max"}{$k}; 124 $a += $v*$v; 125 $c++; 126# print "$k = ",$self->{$k}," - ",$other->{$k},"($c, $a)\n"; 127 } 128 129 return 0 if !$c; 130# 1 - 6*$a/($c*$c*$c - $c); 131# 1-sqrt($a)/$c; 132 1/(1.0+$a); 133} 134 135bootstrap Digest::ManberHash $VERSION; 136 137# Preloaded methods go here. 138 139# Autoload methods go after __END__, and are processed by the autosplit program. 140 1411; 142__END__ 143# 144 145 146