1package Search::Dict; 2require 5.000; 3require Exporter; 4 5my $fc_available; 6BEGIN { 7 $fc_available = '5.015008'; 8 if ( $] ge $fc_available ) { 9 require feature; 10 'feature'->import('fc'); # string avoids warning on old Perls <sigh> 11 } 12} 13 14use strict; 15 16our $VERSION = '1.07'; 17our @ISA = qw(Exporter); 18our @EXPORT = qw(look); 19 20=head1 NAME 21 22Search::Dict - look - search for key in dictionary file 23 24=head1 SYNOPSIS 25 26 use Search::Dict; 27 look *FILEHANDLE, $key, $dict, $fold; 28 29 use Search::Dict; 30 look *FILEHANDLE, $params; 31 32=head1 DESCRIPTION 33 34Sets file position in FILEHANDLE to be first line greater than or equal 35(stringwise) to I<$key>. Returns the new file position, or -1 if an error 36occurs. 37 38The flags specify dictionary order and case folding: 39 40If I<$dict> is true, search by dictionary order (ignore anything but word 41characters and whitespace). The default is honour all characters. 42 43If I<$fold> is true, ignore case. The default is to honour case. 44 45If there are only three arguments and the third argument is a hash 46reference, the keys of that hash can have values C<dict>, C<fold>, and 47C<comp> or C<xfrm> (see below), and their corresponding values will be 48used as the parameters. 49 50If a comparison subroutine (comp) is defined, it must return less than zero, 51zero, or greater than zero, if the first comparand is less than, 52equal, or greater than the second comparand. 53 54If a transformation subroutine (xfrm) is defined, its value is used to 55transform the lines read from the filehandle before their comparison. 56 57=cut 58 59sub look { 60 my($fh,$key,$dict,$fold) = @_; 61 my ($comp, $xfrm); 62 if (@_ == 3 && ref $dict eq 'HASH') { 63 my $params = $dict; 64 $dict = 0; 65 $dict = $params->{dict} if exists $params->{dict}; 66 $fold = $params->{fold} if exists $params->{fold}; 67 $comp = $params->{comp} if exists $params->{comp}; 68 $xfrm = $params->{xfrm} if exists $params->{xfrm}; 69 } 70 $comp = sub { $_[0] cmp $_[1] } unless defined $comp; 71 local($_); 72 my $fno = fileno $fh; 73 my @stat; 74 if ( defined $fno && $fno >= 0 && ! tied *{$fh} ) { # real, open file 75 @stat = eval { stat($fh) }; # in case fileno lies 76 } 77 my($size, $blksize) = @stat[7,11]; 78 $size = do { seek($fh,0,2); my $s = tell($fh); seek($fh,0,0); $s } 79 unless defined $size; 80 $blksize ||= 8192; 81 $key =~ s/[^\w\s]//g if $dict; 82 if ( $fold ) { 83 $key = $] ge $fc_available ? fc($key) : lc($key); 84 } 85 # find the right block 86 my($min, $max) = (0, int($size / $blksize)); 87 my $mid; 88 while ($max - $min > 1) { 89 $mid = int(($max + $min) / 2); 90 seek($fh, $mid * $blksize, 0) 91 or return -1; 92 <$fh> if $mid; # probably a partial line 93 $_ = <$fh>; 94 $_ = $xfrm->($_) if defined $xfrm; 95 chomp; 96 s/[^\w\s]//g if $dict; 97 if ( $fold ) { 98 $_ = $] ge $fc_available ? fc($_) : lc($_); 99 } 100 if (defined($_) && $comp->($_, $key) < 0) { 101 $min = $mid; 102 } 103 else { 104 $max = $mid; 105 } 106 } 107 # find the right line 108 $min *= $blksize; 109 seek($fh,$min,0) 110 or return -1; 111 <$fh> if $min; 112 for (;;) { 113 $min = tell($fh); 114 defined($_ = <$fh>) 115 or last; 116 $_ = $xfrm->($_) if defined $xfrm; 117 chomp; 118 s/[^\w\s]//g if $dict; 119 if ( $fold ) { 120 $_ = $] ge $fc_available ? fc($_) : lc($_); 121 } 122 last if $comp->($_, $key) >= 0; 123 } 124 seek($fh,$min,0); 125 $min; 126} 127 1281; 129