1package KinoSearch1::Index::FieldsWriter;
2use strict;
3use warnings;
4use KinoSearch1::Util::ToolSet;
5use base qw( KinoSearch1::Util::Class );
6
7BEGIN {
8    __PACKAGE__->init_instance_vars(
9        # constructor params / members
10        invindex => undef,
11        seg_name => undef,
12        # members
13        fdata_stream  => undef,
14        findex_stream => undef,
15    );
16}
17use Compress::Zlib qw( compress );
18
19sub init_instance {
20    my $self     = shift;
21    my $invindex = $self->{invindex};
22
23    # open an index stream and a data stream.
24    my $fdx_file = "$self->{seg_name}.fdx";
25    my $fdt_file = "$self->{seg_name}.fdt";
26    for ( $fdx_file, $fdt_file, ) {
27        $invindex->delete_file($_) if $invindex->file_exists($_);
28    }
29    $self->{findex_stream} = $invindex->open_outstream($fdx_file);
30    $self->{fdata_stream}  = $invindex->open_outstream($fdt_file);
31}
32
33sub add_doc {
34    my ( $self, $doc ) = @_;
35
36    # record the data stream's current file pointer in the index.
37    $self->{findex_stream}->lu_write( 'Q', $self->{fdata_stream}->tell );
38
39    # only store fields marked as "stored"
40    my @stored = sort { $a->get_field_num <=> $b->get_field_num }
41        grep $_->get_stored, $doc->get_fields;
42
43    # add the number of stored fields in the Doc
44    my @to_write = ( scalar @stored );
45
46    # add flag bits and value for each stored field
47    for (@stored) {
48        push @to_write, ( $_->get_field_num, $_->get_fdt_bits );
49        push @to_write, $_->get_compressed
50            ? compress( $_->get_value )
51            : $_->get_value;
52        push @to_write, $_->get_tv_string;
53    }
54
55    # write out data
56    my $lu_template = 'V' . ( 'VaTT' x scalar @stored );
57    $self->{fdata_stream}->lu_write( $lu_template, @to_write );
58}
59
60sub add_segment {
61    my ( $self, $seg_reader, $doc_map, $field_num_map ) = @_;
62    my ( $findex_stream, $fdata_stream )
63        = @{$self}{qw( findex_stream fdata_stream )};
64    my $fields_reader = $seg_reader->get_fields_reader;
65
66    my $max = $seg_reader->max_doc;
67    return unless $max;
68    $max -= 1;
69    for my $orig ( 0 .. $max ) {
70        # if the doc isn't deleted, copy it to the new seg
71        next unless defined $doc_map->get($orig);
72
73        # write pointer
74        $findex_stream->lu_write( 'Q', $fdata_stream->tell );
75
76        # retrieve all fields
77        my ( $num_fields, $all_data ) = $fields_reader->fetch_raw($orig);
78
79        # write number of fields
80        $fdata_stream->lu_write( 'V', $num_fields );
81
82        # write data for each field
83        for ( 1 .. $num_fields ) {
84            my ( $field_num, @some_data ) = splice( @$all_data, 0, 4 );
85            $fdata_stream->lu_write( 'VaTT', $field_num_map->get($field_num),
86                @some_data );
87        }
88    }
89}
90
91sub finish {
92    my $self = shift;
93    $self->{fdata_stream}->close;
94    $self->{findex_stream}->close;
95}
96
971;
98
99__END__
100
101==begin devdocs
102
103==head1 NAME
104
105KinoSearch1::Index::FieldsWriter - write stored fields to an invindex
106
107==head1 DESCRIPTION
108
109FieldsWriter writes fields which are marked as stored to the field data and
110field index files.
111
112==head1 COPYRIGHT
113
114Copyright 2005-2010 Marvin Humphrey
115
116==head1 LICENSE, DISCLAIMER, BUGS, etc.
117
118See L<KinoSearch1> version 1.01.
119
120==end devdocs
121==cut
122
123