1package KinoSearch1::Index::IndexReader; 2use strict; 3use warnings; 4use KinoSearch1::Util::ToolSet; 5use base qw( KinoSearch1::Util::Class ); 6 7BEGIN { 8 __PACKAGE__->init_instance_vars( 9 # constructor params / members 10 invindex => undef, 11 seg_infos => undef, 12 close_invindex => 1, 13 invindex_owner => 1, 14 ); 15 __PACKAGE__->ready_get(qw( invindex )); 16} 17 18use KinoSearch1::Store::FSInvIndex; 19use KinoSearch1::Index::SegReader; 20use KinoSearch1::Index::MultiReader; 21use KinoSearch1::Index::SegInfos; 22use KinoSearch1::Index::IndexFileNames qw( 23 WRITE_LOCK_NAME WRITE_LOCK_TIMEOUT 24 COMMIT_LOCK_NAME COMMIT_LOCK_TIMEOUT 25); 26 27sub new { 28 my $temp = shift->SUPER::new(@_); 29 return $temp->_open_multi_or_segreader; 30} 31 32# Returns a subclass of IndexReader: either a MultiReader or a SegReader, 33# depending on whether an invindex contains more than one segment. 34sub _open_multi_or_segreader { 35 my $self = shift; 36 37 # confirm an InvIndex object or make one using a supplied filepath. 38 if ( !a_isa_b( $self->{invindex}, 'KinoSearch1::Store::InvIndex' ) ) { 39 $self->{invindex} = KinoSearch1::Store::FSInvIndex->new( 40 path => $self->{invindex} ); 41 } 42 my $invindex = $self->{invindex}; 43 44 # read the segments file and decide what to do 45 my $reader; 46 $invindex->run_while_locked( 47 lock_name => COMMIT_LOCK_NAME, 48 timeout => COMMIT_LOCK_TIMEOUT, 49 do_body => sub { 50 my $seg_infos = KinoSearch1::Index::SegInfos->new; 51 $seg_infos->read_infos($invindex); 52 53 # create a SegReader for each segment in the invindex 54 my @seg_readers; 55 for my $sinfo ( $seg_infos->infos ) { 56 push @seg_readers, 57 KinoSearch1::Index::SegReader->new( 58 seg_name => $sinfo->get_seg_name, 59 invindex => $invindex, 60 ); 61 } 62 # if there's one SegReader use it; otherwise make a MultiReader 63 $reader 64 = @seg_readers == 1 65 ? $seg_readers[0] 66 : KinoSearch1::Index::MultiReader->new( 67 invindex => $invindex, 68 sub_readers => \@seg_readers, 69 ); 70 }, 71 ); 72 73 return $reader; 74} 75 76=begin comment 77 78 my $num = $reader->max_doc; 79 80Return the highest document number available to the reader. 81 82=end comment 83=cut 84 85sub max_doc { shift->abstract_death } 86 87=begin comment 88 89 my $num = $reader->num_docs; 90 91Return the number of (non-deleted) documents available to the reader. 92 93=end comment 94=cut 95 96sub num_docs { shift->abstract_death } 97 98=begin comment 99 100 my $term_docs = $reader->term_docs($term); 101 102Given a Term, return a TermDocs subclass. 103 104=end comment 105=cut 106 107sub term_docs { shift->abstract_death } 108 109=begin comment 110 111 my $norms_reader = $reader->norms_reader($field_name); 112 113Given a field name, return a NormsReader object. 114 115=end comment 116=cut 117 118sub norms_reader { shift->abstract_death } 119 120=begin comment 121 122 $reader->delete_docs_by_term( $term ); 123 124Delete all the documents available to the reader that index the given Term. 125 126=end comment 127=cut 128 129sub delete_docs_by_term { shift->abstract_death } 130 131=begin comment 132 133 $boolean = $reader->has_deletions 134 135Return true if any documents have been marked as deleted. 136 137=end comment 138=cut 139 140sub has_deletions { shift->abstract_death } 141 142=begin comment 143 144 my $enum = $reader->terms($term); 145 146Given a Term, return a TermEnum subclass. The Enum will be be pre-located via 147$enum->seek($term) to the right spot. 148 149=end comment 150=cut 151 152sub terms { shift->abstract_death } 153 154=begin comment 155 156 my $field_names = $reader->get_field_names( 157 indexed => $indexed_fields_only, 158 ); 159 160Return a hashref which is a list of field names. If the parameter 'indexed' 161is true, return only the names of fields which are indexed. 162 163=end comment 164=cut 165 166sub get_field_names { shift->abstract_death } 167 168=begin comment 169 170 my $infos = $reader->generate_field_infos; 171 172Return a new FieldInfos object, describing all the fields held by the reader. 173The FieldInfos object will be consolidated, and thus may not be representative 174of every field in every segment if there are conflicting definitions. 175 176=end comment 177=cut 178 179sub generate_field_infos { shift->abstract_death } 180 181=begin comment 182 183 my @sparse_segreaders = $reader->segreaders_to_merge; 184 my @all_segreaders = $reader->segreaders_to_merge('all'); 185 186Find segments which are good candidates for merging, as they don't contain 187many valid documents. Returns an array of SegReaders. If passed an argument, 188return all SegReaders. 189 190=end comment 191=cut 192 193sub segreaders_to_merge { shift->abstract_death } 194 195=begin comment 196 197 $reader->close; 198 199Release all resources. 200 201=end comment 202=cut 203 204sub close { shift->abstract_death } 205 2061; 207 208__END__ 209 210==begin devdocs 211 212==head1 NAME 213 214KinoSearch1::Index::IndexReader - base class for objects which read invindexes 215 216==head1 DESCRIPTION 217 218There are two subclasses of the abstract base class IndexReader: SegReader, 219which reads a single segment, and MultiReader, which condenses the output of 220several SegReaders. Since each segment is a self-contained inverted index, a 221SegReader is in effect a complete index reader. 222 223The constructor for IndexReader returns either a SegReader if the index has 224only one segment, or a MultiReader if there are multiple segments. 225 226==head1 COPYRIGHT 227 228Copyright 2005-2010 Marvin Humphrey 229 230==head1 LICENSE, DISCLAIMER, BUGS, etc. 231 232See L<KinoSearch1> version 1.01. 233 234==end devdocs 235==cut 236