1package KinoSearch1::Index::MultiTermDocs; 2use strict; 3use warnings; 4use KinoSearch1::Util::ToolSet; 5use base qw( KinoSearch1::Index::TermDocs ); 6 7BEGIN { 8 __PACKAGE__->init_instance_vars( 9 sub_readers => undef, 10 starts => undef, 11 ); 12} 13our %instance_vars; 14 15sub new { 16 my $self = shift->SUPER::new; 17 confess kerror() unless verify_args( \%instance_vars, @_ ); 18 my %args = ( %instance_vars, @_ ); 19 20 # get a SegTermDocs for each segment 21 my $sub_readers = $args{sub_readers} || []; 22 my $starts = $args{starts} || []; 23 my @sub_term_docs = map { $_->term_docs } @$sub_readers; 24 _init_child( $self, \@sub_term_docs, $starts ); 25 26 return $self; 27} 28 29sub seek { 30 my ( $self, $term ) = @_; 31 $_->seek($term) for @{ $self->_get_sub_term_docs }; 32 $self->_reset_pointer; 33} 34 35sub set_read_positions { 36 my ( $self, $val ) = @_; 37 $_->set_read_positions($val) for @{ $self->_get_sub_term_docs }; 38} 39 40sub close { 41 my $self = shift; 42 $_->close for @{ $self->_get_sub_term_docs }; 43} 44 451; 46 47__END__ 48 49__XS__ 50 51MODULE = KinoSearch1 PACKAGE = KinoSearch1::Index::MultiTermDocs 52 53void 54_init_child(term_docs, sub_term_docs_avref, starts_av) 55 TermDocs *term_docs; 56 SV *sub_term_docs_avref; 57 AV *starts_av; 58PPCODE: 59 Kino1_MultiTermDocs_init_child(term_docs, sub_term_docs_avref, starts_av); 60 61 62=for comment 63Helper for seek(). 64 65=cut 66 67void 68_reset_pointer(term_docs) 69 TermDocs *term_docs; 70PREINIT: 71 MultiTermDocsChild *child; 72PPCODE: 73 child = (MultiTermDocsChild*)term_docs->child; 74 child->base = 0; 75 child->pointer = 0; 76 child->current = NULL; 77 78 79SV* 80_set_or_get(term_docs, ...) 81 TermDocs *term_docs; 82ALIAS: 83 _set_sub_term_docs = 1 84 _get_sub_term_docs = 2 85CODE: 86{ 87 MultiTermDocsChild *child = (MultiTermDocsChild*)term_docs->child; 88 89 KINO_START_SET_OR_GET_SWITCH 90 91 case 1: Kino1_confess("Can't set sub_term_docs"); 92 /* fall through */ 93 case 2: RETVAL = newSVsv( child->sub_term_docs_avref ); 94 break; 95 96 KINO_END_SET_OR_GET_SWITCH 97} 98OUTPUT: RETVAL 99 100__H__ 101 102#ifndef H_KINO_MULTI_TERM_DOCS 103#define H_KINO_MULTI_TERM_DOCS 1 104 105#include "EXTERN.h" 106#include "perl.h" 107#include "XSUB.h" 108#include "KinoSearch1IndexTermDocs.h" 109#include "KinoSearch1UtilCClass.h" 110#include "KinoSearch1UtilMemManager.h" 111 112typedef struct multitermdocschild { 113 I32 num_subs; 114 I32 base; 115 I32 pointer; 116 SV *sub_term_docs_avref; 117 U32 *starts; 118 SV *term_sv; 119 TermDocs **sub_term_docs; 120 TermDocs *current; 121} MultiTermDocsChild; 122 123void Kino1_MultiTermDocs_init_child(TermDocs*, SV*, AV*); 124void Kino1_MultiTermDocs_set_doc_freq_death(TermDocs*, U32); 125U32 Kino1_MultiTermDocs_get_doc_freq(TermDocs*); 126U32 Kino1_MultiTermDocs_get_doc(TermDocs*); 127U32 Kino1_MultiTermDocs_get_freq(TermDocs*); 128SV* Kino1_MultiTermDocs_get_positions(TermDocs*); 129U32 Kino1_MultiTermDocs_bulk_read(TermDocs*, SV*, SV*, U32); 130bool Kino1_MultiTermDocs_next(TermDocs*); 131bool Kino1_MultiTermDocs_skip_to(TermDocs*, U32); 132void Kino1_MultiTermDocs_destroy(TermDocs*); 133 134#endif /* include guard */ 135 136__C__ 137 138#include "KinoSearch1IndexMultiTermDocs.h" 139 140void 141Kino1_MultiTermDocs_init_child(TermDocs* term_docs, SV *sub_term_docs_avref, 142 AV *starts_av) { 143 MultiTermDocsChild *child; 144 I32 i; 145 SV **sv_ptr; 146 AV *sub_term_docs_av; 147 148 /* allocate */ 149 Kino1_New(0, child, 1, MultiTermDocsChild); 150 term_docs->child = child; 151 152 /* assign */ 153 child->current = NULL; 154 child->base = 0; 155 child->pointer = 0; 156 157 /* extract AV* and take stock of how many sub-TermDocs we've got */ 158 child->sub_term_docs_avref = newSVsv(sub_term_docs_avref);; 159 sub_term_docs_av = (AV*)SvRV(sub_term_docs_avref); 160 child->num_subs = av_len(sub_term_docs_av) + 1; 161 162 /* extract starts from starts array, subTermDocs from the subs array */ 163 Kino1_New(0, child->starts, child->num_subs, U32); 164 Kino1_New(0, child->sub_term_docs, child->num_subs, TermDocs*); 165 for (i = 0; i < child->num_subs; i++) { 166 sv_ptr = av_fetch(starts_av, i, 0); 167 if (sv_ptr == NULL) 168 Kino1_confess("starts array doesn't have enough valid members"); 169 child->starts[i] = (U32)SvUV(*sv_ptr); 170 sv_ptr = av_fetch(sub_term_docs_av, i, 0); 171 if (sv_ptr == NULL) 172 Kino1_confess("TermDocs array doesn't have enough valid members"); 173 Kino1_extract_struct(*sv_ptr, child->sub_term_docs[i], TermDocs*, 174 "KinoSearch1::Index::TermDocs"); 175 } 176 177 /* assign method pointers */ 178 term_docs->set_doc_freq = Kino1_MultiTermDocs_set_doc_freq_death; 179 term_docs->get_doc_freq = Kino1_MultiTermDocs_get_doc_freq; 180 term_docs->get_doc = Kino1_MultiTermDocs_get_doc; 181 term_docs->get_freq = Kino1_MultiTermDocs_get_freq; 182 term_docs->get_positions = Kino1_MultiTermDocs_get_positions; 183 term_docs->bulk_read = Kino1_MultiTermDocs_bulk_read; 184 term_docs->next = Kino1_MultiTermDocs_next; 185 term_docs->skip_to = Kino1_MultiTermDocs_skip_to; 186 term_docs->destroy = Kino1_MultiTermDocs_destroy; 187} 188 189void 190Kino1_MultiTermDocs_set_doc_freq_death(TermDocs *term_docs, U32 doc_freq) { 191 Kino1_confess("can't set doc_freq on a MultiTermDocs"); 192} 193 194U32 195Kino1_MultiTermDocs_get_doc_freq(TermDocs *term_docs) { 196 MultiTermDocsChild *child; 197 TermDocs *sub_td; 198 I32 i; 199 U32 doc_freq = 0; 200 201 /* sum the doc_freqs of all segments */ 202 child = (MultiTermDocsChild*)term_docs->child; 203 for (i = 0; i < child->num_subs; i++) { 204 sub_td = child->sub_term_docs[i]; 205 doc_freq += sub_td->get_doc_freq(sub_td); 206 } 207 return doc_freq; 208} 209 210U32 211Kino1_MultiTermDocs_get_doc(TermDocs *term_docs) { 212 MultiTermDocsChild *child; 213 child = (MultiTermDocsChild*)term_docs->child; 214 215 if (child->current == NULL) 216 return KINO_TERM_DOCS_SENTINEL; 217 218 return child->current->get_doc(child->current) + child->base; 219} 220 221U32 222Kino1_MultiTermDocs_get_freq(TermDocs *term_docs) { 223 MultiTermDocsChild *child; 224 child = (MultiTermDocsChild*)term_docs->child; 225 226 if (child->current == NULL) 227 return KINO_TERM_DOCS_SENTINEL; 228 229 return child->current->get_freq(child->current); 230} 231 232SV* 233Kino1_MultiTermDocs_get_positions(TermDocs *term_docs) { 234 MultiTermDocsChild *child; 235 child = (MultiTermDocsChild*)term_docs->child; 236 237 if (child->current == NULL) 238 return &PL_sv_undef; 239 240 return child->current->get_positions(child->current); 241} 242 243 244U32 245Kino1_MultiTermDocs_bulk_read(TermDocs *term_docs, SV *doc_nums_sv, 246 SV *freqs_sv, U32 num_wanted) { 247 MultiTermDocsChild *child; 248 U32 i, num_got, base; 249 U32 *doc_nums; 250 251 child = (MultiTermDocsChild*)term_docs->child; 252 253 while (1) { 254 /* move to the next SegTermDocs */ 255 while (child->current == NULL) { 256 if (child->pointer < child->num_subs) { 257 child->base = child->starts[ child->pointer ]; 258 child->current = child->sub_term_docs[ child->pointer ]; 259 child->pointer++; 260 } 261 else { 262 return 0; 263 } 264 } 265 266 num_got = child->current->bulk_read( 267 child->current, doc_nums_sv, freqs_sv, num_wanted ); 268 269 if (num_got == 0) { 270 /* no more docs left in this segment */ 271 child->current = NULL; 272 } 273 else { 274 /* add the start offset for this seg to each doc */ 275 base = child->base; 276 doc_nums = (U32*)SvPVX(doc_nums_sv); 277 for (i = 0; i < num_got; i++) { 278 *doc_nums++ += base; 279 } 280 281 return num_got; 282 } 283 } 284} 285 286bool 287Kino1_MultiTermDocs_next(TermDocs* term_docs) { 288 MultiTermDocsChild *child; 289 child = (MultiTermDocsChild*)term_docs->child; 290 291 if ( child->current != NULL && child->current->next(child->current) ) { 292 return 1; 293 } 294 else if (child->pointer < child->num_subs) { 295 /* try next segment */ 296 child->base = child->starts[ child->pointer ]; 297 child->current = child->sub_term_docs[ child->pointer ]; 298 child->pointer++; 299 return term_docs->next(term_docs); /* recurse */ 300 } 301 else { 302 /* done with all segments */ 303 return 0; 304 } 305} 306 307bool 308Kino1_MultiTermDocs_skip_to(TermDocs *term_docs, U32 target) { 309 MultiTermDocsChild *child = (MultiTermDocsChild*)term_docs->child; 310 311 if ( child->current != NULL 312 && child->current->skip_to(child->current, (target - child->base)) 313 ) { 314 return TRUE; 315 } 316 else if (child->pointer < child->num_subs) { 317 /* try next segment */ 318 child->base = child->starts[ child->pointer ]; 319 child->current = child->sub_term_docs[ child->pointer ]; 320 child->pointer++; 321 return term_docs->skip_to(term_docs, target); /* recurse */ 322 } 323 else { 324 return FALSE; 325 } 326} 327 328void 329Kino1_MultiTermDocs_destroy(TermDocs* term_docs) { 330 MultiTermDocsChild *child; 331 child = (MultiTermDocsChild*)term_docs->child; 332 333 SvREFCNT_dec(child->sub_term_docs_avref); 334 Kino1_Safefree(child->sub_term_docs); 335 Kino1_Safefree(child->starts); 336 Kino1_Safefree(child); 337 338 Kino1_TermDocs_destroy(term_docs); 339} 340 341__POD__ 342 343==begin devdocs 344 345==head1 NAME 346 347KinoSearch1::Index::MultiTermDocs - multi-segment TermDocs 348 349==head1 DESCRIPTION 350 351Multi-segment implementation of KinoSearch1::Index::TermDocs. 352 353==head1 COPYRIGHT 354 355Copyright 2005-2010 Marvin Humphrey 356 357==head1 LICENSE, DISCLAIMER, BUGS, etc. 358 359See L<KinoSearch1> version 1.01. 360 361==end devdocs 362==cut 363