1package KinoSearch1::Index::FieldInfos; 2use strict; 3use warnings; 4use KinoSearch1::Util::ToolSet; 5use base qw( KinoSearch1::Util::Class Exporter ); 6 7use constant INDEXED => "\x01"; 8use constant VECTORIZED => "\x02"; 9use constant OMIT_NORMS => "\x10"; 10 11our @EXPORT_OK; 12 13BEGIN { 14 __PACKAGE__->init_instance_vars( 15 # members 16 by_name => undef, 17 by_num => undef, 18 from_file => 0, 19 ); 20 __PACKAGE__->ready_get_set(qw( from_file )); 21 22 @EXPORT_OK = qw( 23 INDEXED 24 VECTORIZED 25 OMIT_NORMS 26 ); 27} 28 29use KinoSearch1::Document::Field; 30 31sub init_instance { 32 my $self = shift; 33 $self->{by_name} = {}; 34 $self->{by_num} = []; 35} 36 37sub clone { 38 my $self = shift; 39 my $evil_twin = __PACKAGE__->new; 40 $evil_twin->{from_file} = $self->{from_file}; 41 my @by_num; 42 my %by_name; 43 for my $finfo ( @{ $self->{by_num} } ) { 44 my $dupe = $finfo->clone; 45 push @by_num, $dupe; 46 $by_name{ $finfo->get_name } = $dupe; 47 } 48 $evil_twin->{by_num} = \@by_num; 49 $evil_twin->{by_name} = \%by_name; 50 return $evil_twin; 51} 52 53# Add a user-supplied Field object to the collection. 54sub add_field { 55 my ( $self, $field ) = @_; 56 croak("Not a KinoSearch1::Document::Field") 57 unless a_isa_b( $field, 'KinoSearch1::Document::Field' ); 58 59 # don't mod Field objects for segments that are read back in 60 croak("Can't update FieldInfos that were read in from file") 61 if $self->{from_file}; 62 63 # add the field 64 my $fieldname = $field->get_name; 65 $self->{by_name}{$fieldname} = $field; 66 $self->_assign_field_nums; 67} 68 69# Return the number of fields in the segment. 70sub size { scalar @{ $_[0]->{by_num} } } 71 72# Return a list of the Field objects. 73sub get_infos { @{ $_[0]->{by_num} } } 74 75# Given a fieldname, return its number. 76sub get_field_num { 77 my ( $self, $name ) = @_; 78 return undef 79 unless exists $self->{by_name}{$name}; 80 my $num = $self->{by_name}{$name}->get_field_num; 81 return $num; 82} 83 84# Given a fieldname, return its FieldInfo. 85sub info_by_name { $_[0]->{by_name}{ $_[1] } } 86 87# Given a field number, return its fieldInfo. 88sub info_by_num { $_[0]->{by_num}[ $_[1] ] } 89 90# Given the field number (new, not original), return the name of the field. 91sub field_name { 92 my ( $self, $num ) = @_; 93 my $name = $self->{by_num}[$num]->get_name; 94 croak("Don't know about field number $num") 95 unless defined $name; 96 return $name; 97} 98 99# Sort all the fields lexically by name and assign ascending numbers. 100sub _assign_field_nums { 101 my $self = shift; 102 confess("Can't _assign_field_nums when from_file") if $self->{from_file}; 103 104 # assign field nums according to lexical order of field names 105 @{ $self->{by_num} } 106 = sort { $a->get_name cmp $b->get_name } values %{ $self->{by_name} }; 107 my $inc = 0; 108 $_->set_field_num( $inc++ ) for @{ $self->{by_num} }; 109} 110 111# Decode an existing .fnm file. 112sub read_infos { 113 my ( $self, $instream ) = @_; 114 my ( $by_name, $by_num ) = @{$self}{qw( by_name by_num )}; 115 116 # set flag indicating that this FieldInfos object has been read in 117 $self->{from_file} = 1; 118 119 # read in infos from stream 120 my $num_fields = $instream->lu_read('V'); 121 my @names_and_bits = $instream->lu_read( 'Ta' x $num_fields ); 122 my $field_num = 0; 123 while ( $field_num < $num_fields ) { 124 my ( $name, $bits ) = splice( @names_and_bits, 0, 2 ); 125 my $info = KinoSearch1::Document::Field->new( 126 field_num => $field_num, 127 name => $name, 128 indexed => ( "$bits" & INDEXED ) eq INDEXED ? 1 : 0, 129 vectorized => ( "$bits" & VECTORIZED ) eq VECTORIZED ? 1 : 0, 130 fnm_bits => $bits, 131 ); 132 $by_name->{$name} = $info; 133 # order of storage implies lexical order by name and field number 134 push @$by_num, $info; 135 $field_num++; 136 } 137} 138 139# Write .fnm file. 140sub write_infos { 141 my ( $self, $outstream ) = @_; 142 143 $outstream->lu_write( 'V', scalar @{ $self->{by_num} } ); 144 for my $finfo ( @{ $self->{by_num} } ) { 145 $outstream->lu_write( 'Ta', $finfo->get_name, $finfo->get_fnm_bits, ); 146 } 147} 148 149# Merge two FieldInfos objects, redefining fields as necessary and generating 150# new field numbers. 151sub consolidate { 152 my ( $self, @others ) = @_; 153 my $infos = $self->{by_name}; 154 155 # Make *this* finfos the master FieldInfos object 156 for my $other (@others) { 157 while ( my ( $name, $other_finfo ) = each %{ $other->{by_name} } ) { 158 if ( exists $infos->{$name} ) { 159 $infos->{$name} = $other_finfo->breed_with( $infos->{$name} ); 160 } 161 else { 162 $infos->{$name} = $other_finfo->clone; 163 } 164 } 165 } 166 167 $self->_assign_field_nums; 168} 169 170# Generate a mapping of field numbers between two FieldInfos objects. Should 171# be called by the superset. 172sub generate_field_num_map { 173 my ( $self, $other ) = @_; 174 my $map = ''; 175 for my $other_finfo ( @{ $other->{by_num} } ) { 176 my $orig_finfo = $self->{by_name}{ $other_finfo->get_name }; 177 $map .= pack( 'I', $orig_finfo->get_field_num ); 178 } 179 return KinoSearch1::Util::IntMap->new( \$map ); 180} 181 182sub encode_fnm_bits { 183 my ( undef, $field ) = @_; 184 my $bits = "\0"; 185 for ($bits) { 186 $_ |= INDEXED if $field->get_indexed; 187 $_ |= VECTORIZED if $field->get_vectorized; 188 $_ |= OMIT_NORMS if $field->get_omit_norms; 189 } 190 return $bits; 191} 192 193sub decode_fnm_bits { 194 my ( undef, $field, $bits ) = @_; 195 $field->set_indexed( ( $bits & INDEXED ) eq INDEXED ); 196 $field->set_vectorized( ( $bits & VECTORIZED ) eq VECTORIZED ); 197 $field->set_omit_norms( ( $bits & OMIT_NORMS ) eq OMIT_NORMS ); 198} 199 200sub close { } 201 2021; 203 204__END__ 205 206==begin devdocs 207 208==head1 NAME 209 210KinoSearch1::Index::FieldInfos - track field characteristics 211 212==head1 SYNOPSIS 213 214 my $finfos = KinoSearch1::Index::FieldInfos->new; 215 $finfos->read_infos($instream); 216 217==head1 DESCRIPTION 218 219A FieldInfos object tracks the characteristics of all fields in a given 220segment. 221 222KinoSearch1 counts on having field nums assigned to fields by lexically sorted 223order of field names, but indexes generated by Java Lucene are not likely to 224have this property. 225 226==head1 COPYRIGHT 227 228Copyright 2005-2010 Marvin Humphrey 229 230==head1 LICENSE, DISCLAIMER, BUGS, etc. 231 232See L<KinoSearch1> version 1.01. 233 234==end devdocs 235==cut 236 237