1package KinoSearch1::Index::MultiTermDocs;
2use strict;
3use warnings;
4use KinoSearch1::Util::ToolSet;
5use base qw( KinoSearch1::Index::TermDocs );
6
7BEGIN {
8    __PACKAGE__->init_instance_vars(
9        sub_readers => undef,
10        starts      => undef,
11    );
12}
13our %instance_vars;
14
15sub new {
16    my $self = shift->SUPER::new;
17    confess kerror() unless verify_args( \%instance_vars, @_ );
18    my %args = ( %instance_vars, @_ );
19
20    # get a SegTermDocs for each segment
21    my $sub_readers = $args{sub_readers} || [];
22    my $starts      = $args{starts}      || [];
23    my @sub_term_docs = map { $_->term_docs } @$sub_readers;
24    _init_child( $self, \@sub_term_docs, $starts );
25
26    return $self;
27}
28
29sub seek {
30    my ( $self, $term ) = @_;
31    $_->seek($term) for @{ $self->_get_sub_term_docs };
32    $self->_reset_pointer;
33}
34
35sub set_read_positions {
36    my ( $self, $val ) = @_;
37    $_->set_read_positions($val) for @{ $self->_get_sub_term_docs };
38}
39
40sub close {
41    my $self = shift;
42    $_->close for @{ $self->_get_sub_term_docs };
43}
44
451;
46
47__END__
48
49__XS__
50
51MODULE = KinoSearch1    PACKAGE = KinoSearch1::Index::MultiTermDocs
52
53void
54_init_child(term_docs, sub_term_docs_avref, starts_av)
55    TermDocs *term_docs;
56    SV       *sub_term_docs_avref;
57    AV       *starts_av;
58PPCODE:
59    Kino1_MultiTermDocs_init_child(term_docs, sub_term_docs_avref, starts_av);
60
61
62=for comment
63Helper for seek().
64
65=cut
66
67void
68_reset_pointer(term_docs)
69    TermDocs *term_docs;
70PREINIT:
71    MultiTermDocsChild *child;
72PPCODE:
73    child = (MultiTermDocsChild*)term_docs->child;
74    child->base    = 0;
75    child->pointer = 0;
76    child->current = NULL;
77
78
79SV*
80_set_or_get(term_docs, ...)
81    TermDocs *term_docs;
82ALIAS:
83    _set_sub_term_docs = 1
84    _get_sub_term_docs = 2
85CODE:
86{
87    MultiTermDocsChild *child = (MultiTermDocsChild*)term_docs->child;
88
89    KINO_START_SET_OR_GET_SWITCH
90
91    case 1:  Kino1_confess("Can't set sub_term_docs");
92             /* fall through */
93    case 2:  RETVAL = newSVsv( child->sub_term_docs_avref );
94             break;
95
96    KINO_END_SET_OR_GET_SWITCH
97}
98OUTPUT: RETVAL
99
100__H__
101
102#ifndef H_KINO_MULTI_TERM_DOCS
103#define H_KINO_MULTI_TERM_DOCS 1
104
105#include "EXTERN.h"
106#include "perl.h"
107#include "XSUB.h"
108#include "KinoSearch1IndexTermDocs.h"
109#include "KinoSearch1UtilCClass.h"
110#include "KinoSearch1UtilMemManager.h"
111
112typedef struct multitermdocschild {
113    I32        num_subs;
114    I32        base;
115    I32        pointer;
116    SV        *sub_term_docs_avref;
117    U32       *starts;
118    SV        *term_sv;
119    TermDocs **sub_term_docs;
120    TermDocs  *current;
121} MultiTermDocsChild;
122
123void Kino1_MultiTermDocs_init_child(TermDocs*, SV*, AV*);
124void Kino1_MultiTermDocs_set_doc_freq_death(TermDocs*, U32);
125U32  Kino1_MultiTermDocs_get_doc_freq(TermDocs*);
126U32  Kino1_MultiTermDocs_get_doc(TermDocs*);
127U32  Kino1_MultiTermDocs_get_freq(TermDocs*);
128SV*  Kino1_MultiTermDocs_get_positions(TermDocs*);
129U32  Kino1_MultiTermDocs_bulk_read(TermDocs*, SV*, SV*, U32);
130bool Kino1_MultiTermDocs_next(TermDocs*);
131bool Kino1_MultiTermDocs_skip_to(TermDocs*, U32);
132void Kino1_MultiTermDocs_destroy(TermDocs*);
133
134#endif /* include guard */
135
136__C__
137
138#include "KinoSearch1IndexMultiTermDocs.h"
139
140void
141Kino1_MultiTermDocs_init_child(TermDocs* term_docs, SV *sub_term_docs_avref,
142                              AV *starts_av) {
143    MultiTermDocsChild *child;
144    I32                 i;
145    SV                **sv_ptr;
146    AV                 *sub_term_docs_av;
147
148    /* allocate */
149    Kino1_New(0, child, 1, MultiTermDocsChild);
150    term_docs->child = child;
151
152    /* assign */
153    child->current = NULL;
154    child->base    = 0;
155    child->pointer = 0;
156
157    /* extract AV* and take stock of how many sub-TermDocs we've got */
158    child->sub_term_docs_avref = newSVsv(sub_term_docs_avref);;
159    sub_term_docs_av = (AV*)SvRV(sub_term_docs_avref);
160    child->num_subs = av_len(sub_term_docs_av) + 1;
161
162    /* extract starts from starts array, subTermDocs from the subs array */
163    Kino1_New(0, child->starts, child->num_subs, U32);
164    Kino1_New(0, child->sub_term_docs, child->num_subs, TermDocs*);
165    for (i = 0; i < child->num_subs; i++) {
166        sv_ptr = av_fetch(starts_av, i, 0);
167        if (sv_ptr == NULL)
168            Kino1_confess("starts array doesn't have enough valid members");
169        child->starts[i] = (U32)SvUV(*sv_ptr);
170        sv_ptr = av_fetch(sub_term_docs_av, i, 0);
171        if (sv_ptr == NULL)
172            Kino1_confess("TermDocs array doesn't have enough valid members");
173        Kino1_extract_struct(*sv_ptr, child->sub_term_docs[i], TermDocs*,
174            "KinoSearch1::Index::TermDocs");
175    }
176
177    /* assign method pointers */
178    term_docs->set_doc_freq  = Kino1_MultiTermDocs_set_doc_freq_death;
179    term_docs->get_doc_freq  = Kino1_MultiTermDocs_get_doc_freq;
180    term_docs->get_doc       = Kino1_MultiTermDocs_get_doc;
181    term_docs->get_freq      = Kino1_MultiTermDocs_get_freq;
182    term_docs->get_positions = Kino1_MultiTermDocs_get_positions;
183    term_docs->bulk_read     = Kino1_MultiTermDocs_bulk_read;
184    term_docs->next          = Kino1_MultiTermDocs_next;
185    term_docs->skip_to       = Kino1_MultiTermDocs_skip_to;
186    term_docs->destroy       = Kino1_MultiTermDocs_destroy;
187}
188
189void
190Kino1_MultiTermDocs_set_doc_freq_death(TermDocs *term_docs, U32 doc_freq) {
191    Kino1_confess("can't set doc_freq on a MultiTermDocs");
192}
193
194U32
195Kino1_MultiTermDocs_get_doc_freq(TermDocs *term_docs) {
196    MultiTermDocsChild *child;
197    TermDocs           *sub_td;
198    I32                 i;
199    U32                 doc_freq = 0;
200
201    /* sum the doc_freqs of all segments */
202    child = (MultiTermDocsChild*)term_docs->child;
203    for (i = 0; i < child->num_subs; i++) {
204        sub_td = child->sub_term_docs[i];
205        doc_freq += sub_td->get_doc_freq(sub_td);
206    }
207    return doc_freq;
208}
209
210U32
211Kino1_MultiTermDocs_get_doc(TermDocs *term_docs) {
212    MultiTermDocsChild *child;
213    child = (MultiTermDocsChild*)term_docs->child;
214
215    if (child->current == NULL)
216        return KINO_TERM_DOCS_SENTINEL;
217
218    return child->current->get_doc(child->current) + child->base;
219}
220
221U32
222Kino1_MultiTermDocs_get_freq(TermDocs *term_docs) {
223    MultiTermDocsChild *child;
224    child = (MultiTermDocsChild*)term_docs->child;
225
226    if (child->current == NULL)
227        return KINO_TERM_DOCS_SENTINEL;
228
229    return child->current->get_freq(child->current);
230}
231
232SV*
233Kino1_MultiTermDocs_get_positions(TermDocs *term_docs) {
234    MultiTermDocsChild *child;
235    child = (MultiTermDocsChild*)term_docs->child;
236
237    if (child->current == NULL)
238        return &PL_sv_undef;
239
240    return child->current->get_positions(child->current);
241}
242
243
244U32
245Kino1_MultiTermDocs_bulk_read(TermDocs *term_docs, SV *doc_nums_sv,
246                             SV *freqs_sv, U32 num_wanted) {
247    MultiTermDocsChild *child;
248    U32                 i, num_got, base;
249    U32                *doc_nums;
250
251    child = (MultiTermDocsChild*)term_docs->child;
252
253    while (1) {
254        /* move to the next SegTermDocs */
255        while (child->current == NULL) {
256            if (child->pointer < child->num_subs) {
257                child->base = child->starts[ child->pointer ];
258                child->current = child->sub_term_docs[ child->pointer ];
259                child->pointer++;
260            }
261            else {
262                return 0;
263            }
264        }
265
266        num_got = child->current->bulk_read(
267            child->current, doc_nums_sv, freqs_sv, num_wanted );
268
269        if (num_got == 0) {
270            /* no more docs left in this segment */
271            child->current = NULL;
272        }
273        else {
274            /* add the start offset for this seg to each doc */
275            base = child->base;
276            doc_nums = (U32*)SvPVX(doc_nums_sv);
277            for (i = 0; i < num_got; i++) {
278                *doc_nums++ += base;
279            }
280
281            return num_got;
282        }
283    }
284}
285
286bool
287Kino1_MultiTermDocs_next(TermDocs* term_docs) {
288    MultiTermDocsChild *child;
289    child = (MultiTermDocsChild*)term_docs->child;
290
291    if ( child->current != NULL && child->current->next(child->current) ) {
292        return 1;
293    }
294    else if (child->pointer < child->num_subs) {
295        /* try next segment */
296        child->base    = child->starts[ child->pointer ];
297        child->current = child->sub_term_docs[ child->pointer ];
298        child->pointer++;
299        return term_docs->next(term_docs); /* recurse */
300    }
301    else {
302        /* done with all segments */
303        return 0;
304    }
305}
306
307bool
308Kino1_MultiTermDocs_skip_to(TermDocs *term_docs, U32 target) {
309    MultiTermDocsChild *child = (MultiTermDocsChild*)term_docs->child;
310
311    if (   child->current != NULL
312        && child->current->skip_to(child->current, (target - child->base))
313    ) {
314        return TRUE;
315    }
316    else if (child->pointer < child->num_subs) {
317        /* try next segment */
318        child->base    = child->starts[ child->pointer ];
319        child->current = child->sub_term_docs[ child->pointer ];
320        child->pointer++;
321        return term_docs->skip_to(term_docs, target); /* recurse */
322    }
323    else {
324        return FALSE;
325    }
326}
327
328void
329Kino1_MultiTermDocs_destroy(TermDocs* term_docs) {
330    MultiTermDocsChild *child;
331    child = (MultiTermDocsChild*)term_docs->child;
332
333    SvREFCNT_dec(child->sub_term_docs_avref);
334    Kino1_Safefree(child->sub_term_docs);
335    Kino1_Safefree(child->starts);
336    Kino1_Safefree(child);
337
338    Kino1_TermDocs_destroy(term_docs);
339}
340
341__POD__
342
343==begin devdocs
344
345==head1 NAME
346
347KinoSearch1::Index::MultiTermDocs - multi-segment TermDocs
348
349==head1 DESCRIPTION
350
351Multi-segment implementation of KinoSearch1::Index::TermDocs.
352
353==head1 COPYRIGHT
354
355Copyright 2005-2010 Marvin Humphrey
356
357==head1 LICENSE, DISCLAIMER, BUGS, etc.
358
359See L<KinoSearch1> version 1.01.
360
361==end devdocs
362==cut
363