1package KinoSearch1::Index::FieldsWriter; 2use strict; 3use warnings; 4use KinoSearch1::Util::ToolSet; 5use base qw( KinoSearch1::Util::Class ); 6 7BEGIN { 8 __PACKAGE__->init_instance_vars( 9 # constructor params / members 10 invindex => undef, 11 seg_name => undef, 12 # members 13 fdata_stream => undef, 14 findex_stream => undef, 15 ); 16} 17use Compress::Zlib qw( compress ); 18 19sub init_instance { 20 my $self = shift; 21 my $invindex = $self->{invindex}; 22 23 # open an index stream and a data stream. 24 my $fdx_file = "$self->{seg_name}.fdx"; 25 my $fdt_file = "$self->{seg_name}.fdt"; 26 for ( $fdx_file, $fdt_file, ) { 27 $invindex->delete_file($_) if $invindex->file_exists($_); 28 } 29 $self->{findex_stream} = $invindex->open_outstream($fdx_file); 30 $self->{fdata_stream} = $invindex->open_outstream($fdt_file); 31} 32 33sub add_doc { 34 my ( $self, $doc ) = @_; 35 36 # record the data stream's current file pointer in the index. 37 $self->{findex_stream}->lu_write( 'Q', $self->{fdata_stream}->tell ); 38 39 # only store fields marked as "stored" 40 my @stored = sort { $a->get_field_num <=> $b->get_field_num } 41 grep $_->get_stored, $doc->get_fields; 42 43 # add the number of stored fields in the Doc 44 my @to_write = ( scalar @stored ); 45 46 # add flag bits and value for each stored field 47 for (@stored) { 48 push @to_write, ( $_->get_field_num, $_->get_fdt_bits ); 49 push @to_write, $_->get_compressed 50 ? compress( $_->get_value ) 51 : $_->get_value; 52 push @to_write, $_->get_tv_string; 53 } 54 55 # write out data 56 my $lu_template = 'V' . ( 'VaTT' x scalar @stored ); 57 $self->{fdata_stream}->lu_write( $lu_template, @to_write ); 58} 59 60sub add_segment { 61 my ( $self, $seg_reader, $doc_map, $field_num_map ) = @_; 62 my ( $findex_stream, $fdata_stream ) 63 = @{$self}{qw( findex_stream fdata_stream )}; 64 my $fields_reader = $seg_reader->get_fields_reader; 65 66 my $max = $seg_reader->max_doc; 67 return unless $max; 68 $max -= 1; 69 for my $orig ( 0 .. $max ) { 70 # if the doc isn't deleted, copy it to the new seg 71 next unless defined $doc_map->get($orig); 72 73 # write pointer 74 $findex_stream->lu_write( 'Q', $fdata_stream->tell ); 75 76 # retrieve all fields 77 my ( $num_fields, $all_data ) = $fields_reader->fetch_raw($orig); 78 79 # write number of fields 80 $fdata_stream->lu_write( 'V', $num_fields ); 81 82 # write data for each field 83 for ( 1 .. $num_fields ) { 84 my ( $field_num, @some_data ) = splice( @$all_data, 0, 4 ); 85 $fdata_stream->lu_write( 'VaTT', $field_num_map->get($field_num), 86 @some_data ); 87 } 88 } 89} 90 91sub finish { 92 my $self = shift; 93 $self->{fdata_stream}->close; 94 $self->{findex_stream}->close; 95} 96 971; 98 99__END__ 100 101==begin devdocs 102 103==head1 NAME 104 105KinoSearch1::Index::FieldsWriter - write stored fields to an invindex 106 107==head1 DESCRIPTION 108 109FieldsWriter writes fields which are marked as stored to the field data and 110field index files. 111 112==head1 COPYRIGHT 113 114Copyright 2005-2010 Marvin Humphrey 115 116==head1 LICENSE, DISCLAIMER, BUGS, etc. 117 118See L<KinoSearch1> version 1.01. 119 120==end devdocs 121==cut 122 123