1package KinoSearch1::Index::FieldInfos;
2use strict;
3use warnings;
4use KinoSearch1::Util::ToolSet;
5use base qw( KinoSearch1::Util::Class Exporter );
6
7use constant INDEXED    => "\x01";
8use constant VECTORIZED => "\x02";
9use constant OMIT_NORMS => "\x10";
10
11our @EXPORT_OK;
12
13BEGIN {
14    __PACKAGE__->init_instance_vars(
15        # members
16        by_name   => undef,
17        by_num    => undef,
18        from_file => 0,
19    );
20    __PACKAGE__->ready_get_set(qw( from_file ));
21
22    @EXPORT_OK = qw(
23        INDEXED
24        VECTORIZED
25        OMIT_NORMS
26    );
27}
28
29use KinoSearch1::Document::Field;
30
31sub init_instance {
32    my $self = shift;
33    $self->{by_name} = {};
34    $self->{by_num}  = [];
35}
36
37sub clone {
38    my $self      = shift;
39    my $evil_twin = __PACKAGE__->new;
40    $evil_twin->{from_file} = $self->{from_file};
41    my @by_num;
42    my %by_name;
43    for my $finfo ( @{ $self->{by_num} } ) {
44        my $dupe = $finfo->clone;
45        push @by_num, $dupe;
46        $by_name{ $finfo->get_name } = $dupe;
47    }
48    $evil_twin->{by_num}  = \@by_num;
49    $evil_twin->{by_name} = \%by_name;
50    return $evil_twin;
51}
52
53# Add a user-supplied Field object to the collection.
54sub add_field {
55    my ( $self, $field ) = @_;
56    croak("Not a KinoSearch1::Document::Field")
57        unless a_isa_b( $field, 'KinoSearch1::Document::Field' );
58
59    # don't mod Field objects for segments that are read back in
60    croak("Can't update FieldInfos that were read in from file")
61        if $self->{from_file};
62
63    # add the field
64    my $fieldname = $field->get_name;
65    $self->{by_name}{$fieldname} = $field;
66    $self->_assign_field_nums;
67}
68
69# Return the number of fields in the segment.
70sub size { scalar @{ $_[0]->{by_num} } }
71
72# Return a list of the Field objects.
73sub get_infos { @{ $_[0]->{by_num} } }
74
75# Given a fieldname, return its number.
76sub get_field_num {
77    my ( $self, $name ) = @_;
78    return undef
79        unless exists $self->{by_name}{$name};
80    my $num = $self->{by_name}{$name}->get_field_num;
81    return $num;
82}
83
84# Given a fieldname, return its FieldInfo.
85sub info_by_name { $_[0]->{by_name}{ $_[1] } }
86
87# Given a field number, return its fieldInfo.
88sub info_by_num { $_[0]->{by_num}[ $_[1] ] }
89
90# Given the field number (new, not original), return the name of the field.
91sub field_name {
92    my ( $self, $num ) = @_;
93    my $name = $self->{by_num}[$num]->get_name;
94    croak("Don't know about field number $num")
95        unless defined $name;
96    return $name;
97}
98
99# Sort all the fields lexically by name and assign ascending numbers.
100sub _assign_field_nums {
101    my $self = shift;
102    confess("Can't _assign_field_nums when from_file") if $self->{from_file};
103
104    # assign field nums according to lexical order of field names
105    @{ $self->{by_num} }
106        = sort { $a->get_name cmp $b->get_name } values %{ $self->{by_name} };
107    my $inc = 0;
108    $_->set_field_num( $inc++ ) for @{ $self->{by_num} };
109}
110
111# Decode an existing .fnm file.
112sub read_infos {
113    my ( $self,    $instream ) = @_;
114    my ( $by_name, $by_num )   = @{$self}{qw( by_name by_num )};
115
116    # set flag indicating that this FieldInfos object has been read in
117    $self->{from_file} = 1;
118
119    # read in infos from stream
120    my $num_fields     = $instream->lu_read('V');
121    my @names_and_bits = $instream->lu_read( 'Ta' x $num_fields );
122    my $field_num      = 0;
123    while ( $field_num < $num_fields ) {
124        my ( $name, $bits ) = splice( @names_and_bits, 0, 2 );
125        my $info = KinoSearch1::Document::Field->new(
126            field_num  => $field_num,
127            name       => $name,
128            indexed    => ( "$bits" & INDEXED ) eq INDEXED ? 1 : 0,
129            vectorized => ( "$bits" & VECTORIZED ) eq VECTORIZED ? 1 : 0,
130            fnm_bits   => $bits,
131        );
132        $by_name->{$name} = $info;
133        # order of storage implies lexical order by name and field number
134        push @$by_num, $info;
135        $field_num++;
136    }
137}
138
139# Write .fnm file.
140sub write_infos {
141    my ( $self, $outstream ) = @_;
142
143    $outstream->lu_write( 'V', scalar @{ $self->{by_num} } );
144    for my $finfo ( @{ $self->{by_num} } ) {
145        $outstream->lu_write( 'Ta', $finfo->get_name, $finfo->get_fnm_bits, );
146    }
147}
148
149# Merge two FieldInfos objects, redefining fields as necessary and generating
150# new field numbers.
151sub consolidate {
152    my ( $self, @others ) = @_;
153    my $infos = $self->{by_name};
154
155    # Make *this* finfos the master FieldInfos object
156    for my $other (@others) {
157        while ( my ( $name, $other_finfo ) = each %{ $other->{by_name} } ) {
158            if ( exists $infos->{$name} ) {
159                $infos->{$name} = $other_finfo->breed_with( $infos->{$name} );
160            }
161            else {
162                $infos->{$name} = $other_finfo->clone;
163            }
164        }
165    }
166
167    $self->_assign_field_nums;
168}
169
170# Generate a mapping of field numbers between two FieldInfos objects.  Should
171# be called by the superset.
172sub generate_field_num_map {
173    my ( $self, $other ) = @_;
174    my $map = '';
175    for my $other_finfo ( @{ $other->{by_num} } ) {
176        my $orig_finfo = $self->{by_name}{ $other_finfo->get_name };
177        $map .= pack( 'I', $orig_finfo->get_field_num );
178    }
179    return KinoSearch1::Util::IntMap->new( \$map );
180}
181
182sub encode_fnm_bits {
183    my ( undef, $field ) = @_;
184    my $bits = "\0";
185    for ($bits) {
186        $_ |= INDEXED    if $field->get_indexed;
187        $_ |= VECTORIZED if $field->get_vectorized;
188        $_ |= OMIT_NORMS if $field->get_omit_norms;
189    }
190    return $bits;
191}
192
193sub decode_fnm_bits {
194    my ( undef, $field, $bits ) = @_;
195    $field->set_indexed(    ( $bits & INDEXED )    eq INDEXED );
196    $field->set_vectorized( ( $bits & VECTORIZED ) eq VECTORIZED );
197    $field->set_omit_norms( ( $bits & OMIT_NORMS ) eq OMIT_NORMS );
198}
199
200sub close { }
201
2021;
203
204__END__
205
206==begin devdocs
207
208==head1 NAME
209
210KinoSearch1::Index::FieldInfos - track field characteristics
211
212==head1 SYNOPSIS
213
214    my $finfos = KinoSearch1::Index::FieldInfos->new;
215    $finfos->read_infos($instream);
216
217==head1 DESCRIPTION
218
219A FieldInfos object tracks the characteristics of all fields in a given
220segment.
221
222KinoSearch1 counts on having field nums assigned to fields by lexically sorted
223order of field names, but indexes generated by Java Lucene are not likely to
224have this property.
225
226==head1 COPYRIGHT
227
228Copyright 2005-2010 Marvin Humphrey
229
230==head1 LICENSE, DISCLAIMER, BUGS, etc.
231
232See L<KinoSearch1> version 1.01.
233
234==end devdocs
235==cut
236
237