1package KinoSearch1::Search::HitCollector; 2use strict; 3use warnings; 4use KinoSearch1::Util::ToolSet; 5use base qw( KinoSearch1::Util::CClass ); 6 7# all xs, other than the pragmas/includes 8 9package KinoSearch1::Search::HitQueueCollector; 10use strict; 11use warnings; 12use KinoSearch1::Util::ToolSet; 13use base qw( KinoSearch1::Search::HitCollector ); 14 15BEGIN { 16 __PACKAGE__->init_instance_vars( 17 # constructor args 18 size => undef, 19 ); 20} 21our %instance_vars; 22 23use KinoSearch1::Search::HitQueue; 24 25sub new { 26 my $self = shift->SUPER::new; 27 confess kerror() unless verify_args( \%instance_vars, @_ ); 28 my %args = @_; 29 croak("Required parameter: 'size'") unless defined $args{size}; 30 31 my $hit_queue 32 = KinoSearch1::Search::HitQueue->new( max_size => $args{size} ); 33 $self->_set_storage($hit_queue); 34 $self->_define_collect; 35 36 return $self; 37} 38 39*get_total_hits = *KinoSearch1::Search::HitCollector::get_i; 40*get_hit_queue = *KinoSearch1::Search::HitCollector::get_storage; 41 42sub get_max_size { 43 shift->get_hit_queue->get_max_size; 44} 45 46package KinoSearch1::Search::BitCollector; 47use strict; 48use warnings; 49use KinoSearch1::Util::ToolSet; 50use base qw( KinoSearch1::Search::HitCollector ); 51 52BEGIN { 53 __PACKAGE__->init_instance_vars( 54 # constructor params 55 capacity => 0, 56 ); 57} 58our %instance_vars; 59 60use KinoSearch1::Util::BitVector; 61 62sub new { 63 my $self = shift->SUPER::new; 64 confess kerror() unless verify_args( \%instance_vars, @_ ); 65 my %args = ( %instance_vars, @_ ); 66 67 my $bit_vec 68 = KinoSearch1::Util::BitVector->new( capacity => $args{capacity} ); 69 $self->_set_storage($bit_vec); 70 $self->_define_collect; 71 72 return $self; 73} 74 75*get_bit_vector = *KinoSearch1::Search::HitCollector::get_storage; 76 77package KinoSearch1::Search::FilteredCollector; 78use strict; 79use warnings; 80use KinoSearch1::Util::ToolSet; 81use base qw( KinoSearch1::Search::HitCollector ); 82 83BEGIN { 84 __PACKAGE__->init_instance_vars( 85 hit_collector => undef, 86 filter_bits => undef, 87 ); 88} 89our %instance_vars; 90 91sub new { 92 my $self = shift->SUPER::new; 93 confess kerror() unless verify_args( \%instance_vars, @_ ); 94 my %args = @_; 95 croak("Required parameter: 'hit_collector'") 96 unless a_isa_b( $args{hit_collector}, 97 "KinoSearch1::Search::HitCollector" ); 98 99 $self->_set_filter_bits( $args{filter_bits} ); 100 $self->_set_storage( $args{hit_collector} ); 101 $self->_define_collect; 102 103 return $self; 104} 105 106package KinoSearch1::Search::OffsetCollector; 107use strict; 108use warnings; 109use KinoSearch1::Util::ToolSet; 110use base qw( KinoSearch1::Search::HitCollector ); 111 112BEGIN { 113 __PACKAGE__->init_instance_vars( 114 hit_collector => undef, 115 offset => undef, 116 ); 117} 118our %instance_vars; 119 120sub new { 121 my $self = shift->SUPER::new; 122 confess kerror() unless verify_args( \%instance_vars, @_ ); 123 my %args = @_; 124 croak("Required parameter: 'hit_collector'") 125 unless a_isa_b( $args{hit_collector}, 126 "KinoSearch1::Search::HitCollector" ); 127 128 $self->_set_f( $args{offset} ); 129 $self->_set_storage( $args{hit_collector} ); 130 $self->_define_collect; 131 132 return $self; 133} 134 1351; 136 137__END__ 138 139__XS__ 140 141MODULE = KinoSearch1 PACKAGE = KinoSearch1::Search::HitCollector 142 143void 144new(either_sv) 145 SV *either_sv; 146PREINIT: 147 const char *class; 148 HitCollector *hc; 149PPCODE: 150 hc = Kino1_HC_new(); 151 class = sv_isobject(either_sv) 152 ? sv_reftype(either_sv, 0) 153 : SvPV_nolen(either_sv); 154 ST(0) = sv_newmortal(); 155 sv_setref_pv(ST(0), class, (void*)hc); 156 XSRETURN(1); 157 158=begin comment 159 160 $hit_collector->collect( $doc_num, $score ); 161 162Process a doc_num/score combination. In production, this method should not be 163called from Perl, as collecting hits is an extremely data-intensive operation. 164 165=end comment 166=cut 167 168void 169collect(hc, doc_num, score) 170 HitCollector *hc; 171 U32 doc_num; 172 float score; 173PPCODE: 174 hc->collect(hc, doc_num, score); 175 176SV* 177_set_or_get(hc, ...) 178 HitCollector *hc; 179ALIAS: 180 _set_storage = 1 181 get_storage = 2 182 _set_i = 3 183 get_i = 4 184 _set_f = 5 185 _get_f = 6 186 _set_filter_bits = 7 187 _get_filter_bits = 8 188CODE: 189{ 190 KINO_START_SET_OR_GET_SWITCH 191 192 case 1: SvREFCNT_dec(hc->storage_ref); 193 hc->storage_ref = newSVsv( ST(1) ); 194 Kino1_extract_anon_struct(hc->storage_ref, hc->storage); 195 /* fall through */ 196 case 2: RETVAL = newSVsv(hc->storage_ref); 197 break; 198 199 case 3: hc->i = SvUV( ST(1) ); 200 /* fall through */ 201 case 4: RETVAL = newSVuv(hc->i); 202 break; 203 204 case 5: hc->f = SvNV( ST(1) ); 205 /* fall through */ 206 case 6: RETVAL = newSVnv(hc->f); 207 break; 208 209 case 7: SvREFCNT_dec(hc->filter_bits_ref); 210 hc->filter_bits_ref = newSVsv( ST(1) ); 211 Kino1_extract_struct( hc->filter_bits_ref, hc->filter_bits, 212 BitVector*, "KinoSearch1::Util::BitVector" ); 213 /* fall through */ 214 case 8: RETVAL = newSVsv(hc->filter_bits_ref); 215 break; 216 217 KINO_END_SET_OR_GET_SWITCH 218} 219OUTPUT: RETVAL 220 221void 222DESTROY(hc) 223 HitCollector *hc; 224PPCODE: 225 Kino1_HC_destroy(hc); 226 227 228MODULE = KinoSearch1 PACKAGE = KinoSearch1::Search::HitQueueCollector 229 230void 231_define_collect(hc) 232 HitCollector *hc; 233PPCODE: 234 hc->collect = Kino1_HC_collect_HitQueue; 235 236MODULE = KinoSearch1 PACKAGE = KinoSearch1::Search::BitCollector 237 238void 239_define_collect(hc) 240 HitCollector *hc; 241PPCODE: 242 hc->collect = Kino1_HC_collect_BitVec; 243 244MODULE = KinoSearch1 PACKAGE = KinoSearch1::Search::FilteredCollector 245 246void 247_define_collect(hc); 248 HitCollector *hc; 249PPCODE: 250 hc->collect = Kino1_HC_collect_filtered; 251 252MODULE = KinoSearch1 PACKAGE = KinoSearch1::Search::OffsetCollector 253 254void 255_define_collect(hc); 256 HitCollector *hc; 257PPCODE: 258 hc->collect = Kino1_HC_collect_offset; 259 260 261 262__H__ 263 264#ifndef H_KINO_HIT_COLLECTOR 265#define H_KINO_HIT_COLLECTOR 1 266 267#include "EXTERN.h" 268#include "perl.h" 269#include "XSUB.h" 270#include "KinoSearch1UtilCarp.h" 271#include "KinoSearch1UtilMathUtils.h" 272#include "KinoSearch1UtilBitVector.h" 273#include "KinoSearch1UtilPriorityQueue.h" 274#include "KinoSearch1UtilMemManager.h" 275 276typedef struct hitcollector { 277 void (*collect)(struct hitcollector*, U32, float); 278 float f; 279 U32 i; 280 void *storage; 281 SV *storage_ref; 282 BitVector *filter_bits; 283 SV *filter_bits_ref; 284} HitCollector; 285 286HitCollector* Kino1_HC_new(); 287void Kino1_HC_collect_death(HitCollector*, U32, float); 288void Kino1_HC_collect_HitQueue(HitCollector*, U32, float); 289void Kino1_HC_collect_BitVec(HitCollector*, U32, float); 290void Kino1_HC_collect_filtered(HitCollector*, U32, float); 291void Kino1_HC_collect_offset(HitCollector*, U32, float); 292void Kino1_HC_destroy(HitCollector*); 293 294#endif /* include guard */ 295 296__C__ 297 298 299#include "KinoSearch1SearchHitCollector.h" 300 301HitCollector* 302Kino1_HC_new() { 303 HitCollector *hc; 304 305 /* allocate memory and init */ 306 Kino1_New(0, hc, 1, HitCollector); 307 hc->f = 0; 308 hc->i = 0; 309 hc->storage = NULL; 310 hc->storage_ref = &PL_sv_undef; 311 hc->filter_bits = NULL; 312 hc->filter_bits_ref = &PL_sv_undef; 313 314 /* force the subclass to spec a collect method */ 315 hc->collect = Kino1_HC_collect_death; 316 317 return hc; 318} 319 320void 321Kino1_HC_collect_death(HitCollector *hc, U32 doc_num, float score) { 322 Kino1_confess("hit_collector->collect must be assigned in a subclass"); 323} 324 325 326void 327Kino1_HC_collect_HitQueue(HitCollector *hc, U32 doc_num, float score) { 328 /* add to the total number of hits */ 329 hc->i++; 330 331 /* bail if the score doesn't exceed the minimum */ 332 if (score < hc->f) { 333 return; 334 } 335 else { 336 SV *element; 337 char doc_num_buf[4]; 338 PriorityQueue *hit_queue; 339 hit_queue = (PriorityQueue*)hc->storage; 340 341 /* put a dualvar scalar -- encoded doc_num in PV, score in NV */ 342 element = sv_newmortal(); 343 (void)SvUPGRADE(element, SVt_PVNV); 344 Kino1_encode_bigend_U32(doc_num, &doc_num_buf); 345 sv_setpvn(element, doc_num_buf, (STRLEN)4); 346 SvNV_set(element, (double)score); 347 SvNOK_on(element); 348 (void)Kino1_PriQ_insert(hit_queue, element); 349 350 /* store the bubble score in a more accessible spot */ 351 if (hit_queue->size == hit_queue->max_size) { 352 SV *least_sv; 353 least_sv = Kino1_PriQ_peek(hit_queue); 354 hc->f = SvNV(least_sv); 355 } 356 } 357} 358 359void 360Kino1_HC_collect_BitVec(HitCollector *hc, U32 doc_num, float score) { 361 BitVector *bit_vec; 362 bit_vec = (BitVector*)hc->storage; 363 364 /* add to the total number of hits */ 365 hc->i++; 366 367 /* add the doc_num to the BitVector */ 368 Kino1_BitVec_set(bit_vec, doc_num); 369} 370 371void 372Kino1_HC_collect_filtered(HitCollector *hc, U32 doc_num, float score) { 373 if (hc->filter_bits == NULL) { 374 Kino1_confess("filter_bits not set on FilteredCollector"); 375 } 376 377 if (Kino1_BitVec_get(hc->filter_bits, doc_num)) { 378 HitCollector *inner_collector; 379 inner_collector = (HitCollector*)hc->storage; 380 inner_collector->collect(inner_collector, doc_num, score); 381 } 382} 383 384void 385Kino1_HC_collect_offset(HitCollector *hc, U32 doc_num, float score) { 386 HitCollector *inner_collector = (HitCollector*)hc->storage; 387 U32 offset_doc_num = doc_num + hc->f; 388 inner_collector->collect(inner_collector, offset_doc_num, score); 389} 390 391 392void 393Kino1_HC_destroy(HitCollector *hc) { 394 SvREFCNT_dec(hc->storage_ref); 395 SvREFCNT_dec(hc->filter_bits_ref); 396 Kino1_Safefree(hc); 397} 398 399__POD__ 400 401==begin devdocs 402 403==head1 NAME 404 405KinoSearch1::Search::HitCollector - process doc/score pairs 406 407==head1 DESCRIPTION 408 409A Scorer spits out raw doc_num/score pairs; a HitCollector decides what to do 410with them, based on the hc->collect method. 411 412A HitQueueCollector keeps the highest scoring N documents and their associated 413scores in a HitQueue while iterating through a large list. 414 415A BitCollector builds a BitVector with a set bit for each doc number (scores 416are irrelevant). 417 418A FilterCollector wraps another HitCollector, only allowing the inner 419collector to "see" doc_num/score pairs which make it through the filter. 420 421==head1 COPYRIGHT 422 423Copyright 2005-2010 Marvin Humphrey 424 425==head1 LICENSE, DISCLAIMER, BUGS, etc. 426 427See L<KinoSearch1> version 1.01. 428 429==end devdocs 430==cut 431 432 433