1package KinoSearch1::Search::HitCollector;
2use strict;
3use warnings;
4use KinoSearch1::Util::ToolSet;
5use base qw( KinoSearch1::Util::CClass );
6
7# all xs, other than the pragmas/includes
8
9package KinoSearch1::Search::HitQueueCollector;
10use strict;
11use warnings;
12use KinoSearch1::Util::ToolSet;
13use base qw( KinoSearch1::Search::HitCollector );
14
15BEGIN {
16    __PACKAGE__->init_instance_vars(
17        # constructor args
18        size => undef,
19    );
20}
21our %instance_vars;
22
23use KinoSearch1::Search::HitQueue;
24
25sub new {
26    my $self = shift->SUPER::new;
27    confess kerror() unless verify_args( \%instance_vars, @_ );
28    my %args = @_;
29    croak("Required parameter: 'size'") unless defined $args{size};
30
31    my $hit_queue
32        = KinoSearch1::Search::HitQueue->new( max_size => $args{size} );
33    $self->_set_storage($hit_queue);
34    $self->_define_collect;
35
36    return $self;
37}
38
39*get_total_hits = *KinoSearch1::Search::HitCollector::get_i;
40*get_hit_queue  = *KinoSearch1::Search::HitCollector::get_storage;
41
42sub get_max_size {
43    shift->get_hit_queue->get_max_size;
44}
45
46package KinoSearch1::Search::BitCollector;
47use strict;
48use warnings;
49use KinoSearch1::Util::ToolSet;
50use base qw( KinoSearch1::Search::HitCollector );
51
52BEGIN {
53    __PACKAGE__->init_instance_vars(
54        # constructor params
55        capacity => 0,
56    );
57}
58our %instance_vars;
59
60use KinoSearch1::Util::BitVector;
61
62sub new {
63    my $self = shift->SUPER::new;
64    confess kerror() unless verify_args( \%instance_vars, @_ );
65    my %args = ( %instance_vars, @_ );
66
67    my $bit_vec
68        = KinoSearch1::Util::BitVector->new( capacity => $args{capacity} );
69    $self->_set_storage($bit_vec);
70    $self->_define_collect;
71
72    return $self;
73}
74
75*get_bit_vector = *KinoSearch1::Search::HitCollector::get_storage;
76
77package KinoSearch1::Search::FilteredCollector;
78use strict;
79use warnings;
80use KinoSearch1::Util::ToolSet;
81use base qw( KinoSearch1::Search::HitCollector );
82
83BEGIN {
84    __PACKAGE__->init_instance_vars(
85        hit_collector => undef,
86        filter_bits   => undef,
87    );
88}
89our %instance_vars;
90
91sub new {
92    my $self = shift->SUPER::new;
93    confess kerror() unless verify_args( \%instance_vars, @_ );
94    my %args = @_;
95    croak("Required parameter: 'hit_collector'")
96        unless a_isa_b( $args{hit_collector},
97        "KinoSearch1::Search::HitCollector" );
98
99    $self->_set_filter_bits( $args{filter_bits} );
100    $self->_set_storage( $args{hit_collector} );
101    $self->_define_collect;
102
103    return $self;
104}
105
106package KinoSearch1::Search::OffsetCollector;
107use strict;
108use warnings;
109use KinoSearch1::Util::ToolSet;
110use base qw( KinoSearch1::Search::HitCollector );
111
112BEGIN {
113    __PACKAGE__->init_instance_vars(
114        hit_collector => undef,
115        offset        => undef,
116    );
117}
118our %instance_vars;
119
120sub new {
121    my $self = shift->SUPER::new;
122    confess kerror() unless verify_args( \%instance_vars, @_ );
123    my %args = @_;
124    croak("Required parameter: 'hit_collector'")
125        unless a_isa_b( $args{hit_collector},
126        "KinoSearch1::Search::HitCollector" );
127
128    $self->_set_f( $args{offset} );
129    $self->_set_storage( $args{hit_collector} );
130    $self->_define_collect;
131
132    return $self;
133}
134
1351;
136
137__END__
138
139__XS__
140
141MODULE = KinoSearch1    PACKAGE = KinoSearch1::Search::HitCollector
142
143void
144new(either_sv)
145    SV *either_sv;
146PREINIT:
147    const char   *class;
148    HitCollector *hc;
149PPCODE:
150    hc    = Kino1_HC_new();
151    class = sv_isobject(either_sv)
152        ? sv_reftype(either_sv, 0)
153        : SvPV_nolen(either_sv);
154    ST(0) = sv_newmortal();
155    sv_setref_pv(ST(0), class, (void*)hc);
156    XSRETURN(1);
157
158=begin comment
159
160    $hit_collector->collect( $doc_num, $score );
161
162Process a doc_num/score combination.  In production, this method should not be
163called from Perl, as collecting hits is an extremely data-intensive operation.
164
165=end comment
166=cut
167
168void
169collect(hc, doc_num, score)
170    HitCollector *hc;
171    U32           doc_num;
172    float         score;
173PPCODE:
174    hc->collect(hc, doc_num, score);
175
176SV*
177_set_or_get(hc, ...)
178    HitCollector *hc;
179ALIAS:
180    _set_storage     = 1
181    get_storage      = 2
182    _set_i           = 3
183    get_i            = 4
184    _set_f           = 5
185    _get_f           = 6
186    _set_filter_bits = 7
187    _get_filter_bits = 8
188CODE:
189{
190    KINO_START_SET_OR_GET_SWITCH
191
192    case 1:  SvREFCNT_dec(hc->storage_ref);
193             hc->storage_ref = newSVsv( ST(1) );
194             Kino1_extract_anon_struct(hc->storage_ref, hc->storage);
195             /* fall through */
196    case 2:  RETVAL = newSVsv(hc->storage_ref);
197             break;
198
199    case 3:  hc->i = SvUV( ST(1) );
200             /* fall through */
201    case 4:  RETVAL = newSVuv(hc->i);
202             break;
203
204    case 5:  hc->f = SvNV( ST(1) );
205             /* fall through */
206    case 6:  RETVAL = newSVnv(hc->f);
207             break;
208
209    case 7:  SvREFCNT_dec(hc->filter_bits_ref);
210             hc->filter_bits_ref = newSVsv( ST(1) );
211             Kino1_extract_struct( hc->filter_bits_ref, hc->filter_bits,
212                BitVector*, "KinoSearch1::Util::BitVector" );
213             /* fall through */
214    case 8:  RETVAL = newSVsv(hc->filter_bits_ref);
215             break;
216
217    KINO_END_SET_OR_GET_SWITCH
218}
219OUTPUT: RETVAL
220
221void
222DESTROY(hc)
223    HitCollector *hc;
224PPCODE:
225    Kino1_HC_destroy(hc);
226
227
228MODULE = KinoSearch1    PACKAGE = KinoSearch1::Search::HitQueueCollector
229
230void
231_define_collect(hc)
232    HitCollector *hc;
233PPCODE:
234    hc->collect = Kino1_HC_collect_HitQueue;
235
236MODULE = KinoSearch1    PACKAGE = KinoSearch1::Search::BitCollector
237
238void
239_define_collect(hc)
240    HitCollector *hc;
241PPCODE:
242    hc->collect = Kino1_HC_collect_BitVec;
243
244MODULE = KinoSearch1    PACKAGE = KinoSearch1::Search::FilteredCollector
245
246void
247_define_collect(hc);
248    HitCollector *hc;
249PPCODE:
250    hc->collect = Kino1_HC_collect_filtered;
251
252MODULE = KinoSearch1    PACKAGE = KinoSearch1::Search::OffsetCollector
253
254void
255_define_collect(hc);
256    HitCollector *hc;
257PPCODE:
258    hc->collect = Kino1_HC_collect_offset;
259
260
261
262__H__
263
264#ifndef H_KINO_HIT_COLLECTOR
265#define H_KINO_HIT_COLLECTOR 1
266
267#include "EXTERN.h"
268#include "perl.h"
269#include "XSUB.h"
270#include "KinoSearch1UtilCarp.h"
271#include "KinoSearch1UtilMathUtils.h"
272#include "KinoSearch1UtilBitVector.h"
273#include "KinoSearch1UtilPriorityQueue.h"
274#include "KinoSearch1UtilMemManager.h"
275
276typedef struct hitcollector {
277    void      (*collect)(struct hitcollector*, U32, float);
278    float       f;
279    U32         i;
280    void       *storage;
281    SV         *storage_ref;
282    BitVector  *filter_bits;
283    SV         *filter_bits_ref;
284} HitCollector;
285
286HitCollector* Kino1_HC_new();
287void Kino1_HC_collect_death(HitCollector*, U32, float);
288void Kino1_HC_collect_HitQueue(HitCollector*, U32, float);
289void Kino1_HC_collect_BitVec(HitCollector*, U32, float);
290void Kino1_HC_collect_filtered(HitCollector*, U32, float);
291void Kino1_HC_collect_offset(HitCollector*, U32, float);
292void Kino1_HC_destroy(HitCollector*);
293
294#endif /* include guard */
295
296__C__
297
298
299#include "KinoSearch1SearchHitCollector.h"
300
301HitCollector*
302Kino1_HC_new() {
303    HitCollector  *hc;
304
305    /* allocate memory and init */
306    Kino1_New(0, hc, 1, HitCollector);
307    hc->f               = 0;
308    hc->i               = 0;
309    hc->storage         = NULL;
310    hc->storage_ref     = &PL_sv_undef;
311    hc->filter_bits     = NULL;
312    hc->filter_bits_ref = &PL_sv_undef;
313
314    /* force the subclass to spec a collect method */
315    hc->collect = Kino1_HC_collect_death;
316
317    return hc;
318}
319
320void
321Kino1_HC_collect_death(HitCollector *hc, U32 doc_num, float score) {
322    Kino1_confess("hit_collector->collect must be assigned in a subclass");
323}
324
325
326void
327Kino1_HC_collect_HitQueue(HitCollector *hc, U32 doc_num, float score) {
328    /* add to the total number of hits */
329    hc->i++;
330
331    /* bail if the score doesn't exceed the minimum */
332    if (score < hc->f) {
333        return;
334    }
335    else {
336        SV *element;
337        char doc_num_buf[4];
338        PriorityQueue *hit_queue;
339        hit_queue = (PriorityQueue*)hc->storage;
340
341        /* put a dualvar scalar -- encoded doc_num in PV, score in NV */
342        element = sv_newmortal();
343        (void)SvUPGRADE(element, SVt_PVNV);
344        Kino1_encode_bigend_U32(doc_num, &doc_num_buf);
345        sv_setpvn(element, doc_num_buf, (STRLEN)4);
346        SvNV_set(element, (double)score);
347        SvNOK_on(element);
348        (void)Kino1_PriQ_insert(hit_queue, element);
349
350        /* store the bubble score in a more accessible spot */
351        if (hit_queue->size == hit_queue->max_size) {
352            SV *least_sv;
353            least_sv = Kino1_PriQ_peek(hit_queue);
354            hc->f    = SvNV(least_sv);
355        }
356    }
357}
358
359void
360Kino1_HC_collect_BitVec(HitCollector *hc, U32 doc_num, float score) {
361    BitVector *bit_vec;
362    bit_vec = (BitVector*)hc->storage;
363
364    /* add to the total number of hits */
365    hc->i++;
366
367    /* add the doc_num to the BitVector */
368    Kino1_BitVec_set(bit_vec, doc_num);
369}
370
371void
372Kino1_HC_collect_filtered(HitCollector *hc, U32 doc_num, float score) {
373    if (hc->filter_bits == NULL) {
374        Kino1_confess("filter_bits not set on FilteredCollector");
375    }
376
377    if (Kino1_BitVec_get(hc->filter_bits, doc_num)) {
378        HitCollector *inner_collector;
379        inner_collector = (HitCollector*)hc->storage;
380        inner_collector->collect(inner_collector, doc_num, score);
381    }
382}
383
384void
385Kino1_HC_collect_offset(HitCollector *hc, U32 doc_num, float score) {
386    HitCollector *inner_collector = (HitCollector*)hc->storage;
387    U32 offset_doc_num = doc_num + hc->f;
388    inner_collector->collect(inner_collector, offset_doc_num, score);
389}
390
391
392void
393Kino1_HC_destroy(HitCollector *hc) {
394    SvREFCNT_dec(hc->storage_ref);
395    SvREFCNT_dec(hc->filter_bits_ref);
396    Kino1_Safefree(hc);
397}
398
399__POD__
400
401==begin devdocs
402
403==head1 NAME
404
405KinoSearch1::Search::HitCollector - process doc/score pairs
406
407==head1 DESCRIPTION
408
409A Scorer spits out raw doc_num/score pairs; a HitCollector decides what to do
410with them, based on the hc->collect method.
411
412A HitQueueCollector keeps the highest scoring N documents and their associated
413scores in a HitQueue while iterating through a large list.
414
415A BitCollector builds a BitVector with a set bit for each doc number (scores
416are irrelevant).
417
418A FilterCollector wraps another HitCollector, only allowing the inner
419collector to "see" doc_num/score pairs which make it through the filter.
420
421==head1 COPYRIGHT
422
423Copyright 2005-2010 Marvin Humphrey
424
425==head1 LICENSE, DISCLAIMER, BUGS, etc.
426
427See L<KinoSearch1> version 1.01.
428
429==end devdocs
430==cut
431
432
433